https://github.com/frasercrmck updated https://github.com/llvm/llvm-project/pull/139527
>From 875c5fa90fff79456da734ae4be1e4590df6d127 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Thu, 1 May 2025 17:24:58 +0100 Subject: [PATCH 1/3] [libclc] Move sin, cos & sincos to CLC library This commit also vectorizes these builtins along the way. --- libclc/clc/include/clc/math/clc_cos.h | 19 ++ libclc/clc/include/clc/math/clc_sin.h | 19 ++ libclc/clc/include/clc/math/clc_sincos.h | 19 ++ .../clc/include/clc/math/clc_sincos_helpers.h | 7 + .../clc/math/clc_sincos_helpers_fp64.inc | 17 ++ libclc/clc/include/clc/math/tables.h | 2 +- libclc/clc/lib/generic/SOURCES | 3 + libclc/clc/lib/generic/math/clc_cos.cl | 21 ++ libclc/clc/lib/generic/math/clc_cos.inc | 63 ++++ libclc/clc/lib/generic/math/clc_sin.cl | 25 ++ libclc/clc/lib/generic/math/clc_sin.inc | 68 +++++ libclc/clc/lib/generic/math/clc_sincos.cl | 14 + .../lib/generic/math/clc_sincos.inc} | 8 +- .../lib/generic/math/clc_sincos_helpers.cl | 24 ++ .../generic/math/clc_sincos_helpers_fp64.inc | 234 ++++++++++++++ libclc/clc/lib/generic/math/clc_tables.cl | 62 ++++ libclc/clspv/lib/SOURCES | 2 - libclc/generic/include/clc/math/sincos.h | 4 +- libclc/generic/include/clc/math/sincos.inc | 14 - libclc/generic/lib/SOURCES | 2 - libclc/generic/lib/math/clc_tan.cl | 1 - libclc/generic/lib/math/cos.cl | 42 +-- libclc/generic/lib/math/cos.inc | 34 --- libclc/generic/lib/math/sin.cl | 41 +-- libclc/generic/lib/math/sin.inc | 38 --- libclc/generic/lib/math/sincos.cl | 4 +- libclc/generic/lib/math/sincos_helpers.cl | 285 ------------------ libclc/generic/lib/math/sincos_helpers.h | 24 -- libclc/generic/lib/math/tables.cl | 30 -- libclc/spirv/lib/SOURCES | 2 - 30 files changed, 612 insertions(+), 516 deletions(-) create mode 100644 libclc/clc/include/clc/math/clc_cos.h create mode 100644 libclc/clc/include/clc/math/clc_sin.h create mode 100644 libclc/clc/include/clc/math/clc_sincos.h create mode 100644 libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc create mode 100644 libclc/clc/lib/generic/math/clc_cos.cl create mode 100644 libclc/clc/lib/generic/math/clc_cos.inc create mode 100644 libclc/clc/lib/generic/math/clc_sin.cl create mode 100644 libclc/clc/lib/generic/math/clc_sin.inc create mode 100644 libclc/clc/lib/generic/math/clc_sincos.cl rename libclc/{generic/lib/math/sincos.inc => clc/lib/generic/math/clc_sincos.inc} (62%) create mode 100644 libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc delete mode 100644 libclc/generic/include/clc/math/sincos.inc delete mode 100644 libclc/generic/lib/math/cos.inc delete mode 100644 libclc/generic/lib/math/sin.inc delete mode 100644 libclc/generic/lib/math/sincos_helpers.cl delete mode 100644 libclc/generic/lib/math/sincos_helpers.h delete mode 100644 libclc/generic/lib/math/tables.cl diff --git a/libclc/clc/include/clc/math/clc_cos.h b/libclc/clc/include/clc/math/clc_cos.h new file mode 100644 index 0000000000000..44681608efc37 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_cos.h @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_COS_H__ +#define __CLC_MATH_CLC_COS_H__ + +#define __CLC_BODY <clc/math/unary_decl.inc> +#define __CLC_FUNCTION __clc_cos + +#include <clc/math/gentype.inc> + +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_COS_H__ diff --git a/libclc/clc/include/clc/math/clc_sin.h b/libclc/clc/include/clc/math/clc_sin.h new file mode 100644 index 0000000000000..de4c722ca123f --- /dev/null +++ b/libclc/clc/include/clc/math/clc_sin.h @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_SIN_H__ +#define __CLC_MATH_CLC_SIN_H__ + +#define __CLC_BODY <clc/math/unary_decl.inc> +#define __CLC_FUNCTION __clc_sin + +#include <clc/math/gentype.inc> + +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_SIN_H__ diff --git a/libclc/clc/include/clc/math/clc_sincos.h b/libclc/clc/include/clc/math/clc_sincos.h new file mode 100644 index 0000000000000..e26dc7c024c9c --- /dev/null +++ b/libclc/clc/include/clc/math/clc_sincos.h @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_SINCOS_H__ +#define __CLC_MATH_CLC_SINCOS_H__ + +#define __CLC_BODY <clc/math/unary_decl_with_ptr.inc> +#define __CLC_FUNCTION __clc_sincos + +#include <clc/math/gentype.inc> + +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_SINCOS_H__ diff --git a/libclc/clc/include/clc/math/clc_sincos_helpers.h b/libclc/clc/include/clc/math/clc_sincos_helpers.h index 8029d439f4c62..e3a2e1c0b605c 100644 --- a/libclc/clc/include/clc/math/clc_sincos_helpers.h +++ b/libclc/clc/include/clc/math/clc_sincos_helpers.h @@ -16,4 +16,11 @@ #undef __FLOAT_ONLY +#define __DOUBLE_ONLY +#define __CLC_BODY <clc/math/clc_sincos_helpers_fp64.inc> + +#include <clc/math/gentype.inc> + +#undef __DOUBLE_ONLY + #endif // __CLC_MATH_CLC_SINCOS_HELPERS_H__ diff --git a/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc b/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc new file mode 100644 index 0000000000000..09c6e1c965f64 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +_CLC_DECL _CLC_OVERLOAD void +__clc_remainder_piby2_medium(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r, + private __CLC_DOUBLEN *rr, + private __CLC_INTN *regn); + +_CLC_DECL _CLC_OVERLOAD void +__clc_remainder_piby2_large(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r, + private __CLC_DOUBLEN *rr, + private __CLC_INTN *regn); diff --git a/libclc/clc/include/clc/math/tables.h b/libclc/clc/include/clc/math/tables.h index fb172b0b8f221..0fec778b53679 100644 --- a/libclc/clc/include/clc/math/tables.h +++ b/libclc/clc/include/clc/math/tables.h @@ -61,7 +61,6 @@ TABLE_FUNCTION_DECL(float2, log2_tbl); TABLE_FUNCTION_DECL(float2, log10_tbl); -TABLE_FUNCTION_DECL(uint4, pibits_tbl); CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_head); CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_tail); @@ -75,6 +74,7 @@ CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_head); CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_tail); CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_head); CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_tail); +CLC_TABLE_FUNCTION_DECL(ulong, pibits_tbl); #ifdef cl_khr_fp64 diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index 4240e7b08e7d1..4d66c749fc53e 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -32,6 +32,7 @@ math/clc_atanpi.cl math/clc_cbrt.cl math/clc_ceil.cl math/clc_copysign.cl +math/clc_cos.cl math/clc_cosh.cl math/clc_cospi.cl math/clc_ep_log.cl @@ -86,6 +87,8 @@ math/clc_rint.cl math/clc_rootn.cl math/clc_round.cl math/clc_rsqrt.cl +math/clc_sin.cl +math/clc_sincos.cl math/clc_sincos_helpers.cl math/clc_sinh.cl math/clc_sinpi.cl diff --git a/libclc/clc/lib/generic/math/clc_cos.cl b/libclc/clc/lib/generic/math/clc_cos.cl new file mode 100644 index 0000000000000..0c9dc287aa0b4 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_cos.cl @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/clcmacro.h> +#include <clc/float/definitions.h> +#include <clc/math/clc_fabs.h> +#include <clc/math/clc_sincos_helpers.h> +#include <clc/math/clc_sincos_piby4.h> +#include <clc/math/math.h> +#include <clc/relational/clc_isinf.h> +#include <clc/relational/clc_isnan.h> +#include <clc/relational/clc_select.h> + +#define __CLC_BODY <clc_cos.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_cos.inc b/libclc/clc/lib/generic/math/clc_cos.inc new file mode 100644 index 0000000000000..4b8108c086090 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_cos.inc @@ -0,0 +1,63 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN __clc_cos(__CLC_FLOATN x) { + __CLC_FLOATN absx = __clc_fabs(x); + + __CLC_FLOATN r0, r1; + __CLC_INTN regn = __clc_argReductionS(&r0, &r1, absx); + + __CLC_FLOATN ss = -__clc_sinf_piby4(r0, r1); + __CLC_FLOATN cc = __clc_cosf_piby4(r0, r1); + + __CLC_FLOATN c = (regn & 1) != 0 ? ss : cc; + c = __CLC_AS_FLOATN(__CLC_AS_INTN(c) ^ ((regn > 1) << 31)); + + c = __clc_select(c, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isinf(x)); + + return c; +} + +#elif __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cos(__CLC_GENTYPE x) { + return __CLC_CONVERT_GENTYPE(__clc_cos(__CLC_CONVERT_FLOATN(x))); +} + +#elif __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cos(__CLC_GENTYPE x) { + __CLC_GENTYPE absx = __clc_fabs(x); + + __CLC_BIT_INTN is_medium = absx < 0x1.0p+47; + + __CLC_INTN regn_m, regn_l; + __CLC_GENTYPE r_m, r_l, rr_m, rr_l; + + __clc_remainder_piby2_medium(absx, &r_m, &rr_m, ®n_m); + __clc_remainder_piby2_large(absx, &r_l, &rr_l, ®n_l); + + __CLC_GENTYPE r = is_medium ? r_m : r_l; + __CLC_GENTYPE rr = is_medium ? rr_m : rr_l; + __CLC_INTN regn = __CLC_CONVERT_INTN(is_medium) ? regn_m : regn_l; + + __CLC_GENTYPE sinval, cosval; + __clc_sincos_piby4(r, rr, &sinval, &cosval); + sinval = -sinval; + + __CLC_LONGN c = + __CLC_AS_LONGN(__CLC_CONVERT_BIT_INTN((regn & 1) != 0) ? sinval : cosval); + c ^= __CLC_CONVERT_BIT_INTN(regn > 1) << 63; + + return __clc_isnan(absx) | __clc_isinf(absx) ? __CLC_GENTYPE_NAN + : __CLC_AS_GENTYPE(c); +} + +#endif diff --git a/libclc/clc/lib/generic/math/clc_sin.cl b/libclc/clc/lib/generic/math/clc_sin.cl new file mode 100644 index 0000000000000..0ff9739c6a846 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_sin.cl @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/clcmacro.h> +#include <clc/float/definitions.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_fabs.h> +#include <clc/math/clc_sincos_helpers.h> +#include <clc/math/clc_sincos_piby4.h> +#include <clc/math/clc_trunc.h> +#include <clc/math/math.h> +#include <clc/math/tables.h> +#include <clc/relational/clc_isinf.h> +#include <clc/relational/clc_isnan.h> +#include <clc/relational/clc_select.h> +#include <clc/shared/clc_max.h> + +#define __CLC_BODY <clc_sin.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_sin.inc b/libclc/clc/lib/generic/math/clc_sin.inc new file mode 100644 index 0000000000000..b4f72eb625eb0 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_sin.inc @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN __clc_sin(__CLC_FLOATN x) { + __CLC_FLOATN absx = __clc_fabs(x); + + __CLC_FLOATN r0, r1; + __CLC_INTN regn = __clc_argReductionS(&r0, &r1, absx); + + __CLC_FLOATN ss = __clc_sinf_piby4(r0, r1); + __CLC_FLOATN cc = __clc_cosf_piby4(r0, r1); + + __CLC_FLOATN s = (regn & 1) != 0 ? cc : ss; + s = __CLC_AS_FLOATN(__CLC_AS_INTN(s) ^ ((regn > 1) << 31) ^ + (__CLC_AS_INTN(x) ^ __CLC_AS_INTN(absx))); + + s = __clc_select(s, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isinf(x)); + + // Subnormals + s = x == 0.0f ? x : s; + + return s; +} + +#elif __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sin(__CLC_GENTYPE x) { + return __CLC_CONVERT_GENTYPE(__clc_sin(__CLC_CONVERT_FLOATN(x))); +} + +#elif __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sin(__CLC_GENTYPE x) { + __CLC_GENTYPE absx = __clc_fabs(x); + + __CLC_BIT_INTN is_medium = absx < 0x1.0p+47; + + __CLC_INTN regn_m, regn_l; + __CLC_GENTYPE r_m, r_l, rr_m, rr_l; + + __clc_remainder_piby2_medium(absx, &r_m, &rr_m, ®n_m); + __clc_remainder_piby2_large(absx, &r_l, &rr_l, ®n_l); + + __CLC_GENTYPE r = is_medium ? r_m : r_l; + __CLC_GENTYPE rr = is_medium ? rr_m : rr_l; + __CLC_INTN regn = __CLC_CONVERT_INTN(is_medium) ? regn_m : regn_l; + + __CLC_GENTYPE sinval, cosval; + __clc_sincos_piby4(r, rr, &sinval, &cosval); + + __CLC_LONGN s = + __CLC_AS_LONGN(__CLC_CONVERT_BIT_INTN((regn & 1) != 0) ? cosval : sinval); + + s ^= (__CLC_CONVERT_BIT_INTN(regn > 1) << 63) ^ + (__CLC_CONVERT_BIT_INTN(x < 0.0) << 63); + + return __clc_isinf(x) | __clc_isnan(x) ? __CLC_GENTYPE_NAN + : __CLC_AS_GENTYPE(s); +} + +#endif diff --git a/libclc/clc/lib/generic/math/clc_sincos.cl b/libclc/clc/lib/generic/math/clc_sincos.cl new file mode 100644 index 0000000000000..2209a41593a2d --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_sincos.cl @@ -0,0 +1,14 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/internal/clc.h> +#include <clc/math/clc_cos.h> +#include <clc/math/clc_sin.h> + +#define __CLC_BODY <clc_sincos.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/sincos.inc b/libclc/clc/lib/generic/math/clc_sincos.inc similarity index 62% rename from libclc/generic/lib/math/sincos.inc rename to libclc/clc/lib/generic/math/clc_sincos.inc index b1d4f88c3d427..1e21b7549e448 100644 --- a/libclc/generic/lib/math/sincos.inc +++ b/libclc/clc/lib/generic/math/clc_sincos.inc @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \ - _CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \ - *cosval = cos(x); \ - return sin(x); \ +#define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \ + _CLC_OVERLOAD _CLC_DEF TYPE __clc_sincos(TYPE x, ADDRSPACE TYPE *cosval) { \ + *cosval = __clc_cos(x); \ + return __clc_sin(x); \ } __CLC_DECLARE_SINCOS(global, __CLC_GENTYPE) diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl index 24676d3c7711c..c1768e39243fa 100644 --- a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl +++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl @@ -31,3 +31,27 @@ #define __CLC_BODY <clc_sincos_helpers.inc> #include <clc/math/gentype.inc> + +#undef __FLOAT_ONLY + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +#include <clc/math/clc_fract.h> +#include <clc/math/tables.h> +#include <clc/shared/clc_max.h> + +#define bytealign(src0, src1, src2) \ + (__CLC_CONVERT_UINTN( \ + ((__CLC_CONVERT_LONGN((src0)) << 32) | __CLC_CONVERT_LONGN((src1))) >> \ + (((src2) & 3) * 8))) + +#define __DOUBLE_ONLY +#define __CLC_BODY <clc_sincos_helpers_fp64.inc> + +#include <clc/math/gentype.inc> + +#undef __DOUBLE_ONLY + +#endif diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc new file mode 100644 index 0000000000000..3a8edb378cae2 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc @@ -0,0 +1,234 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Reduction for medium sized arguments +_CLC_DEF _CLC_OVERLOAD void +__clc_remainder_piby2_medium(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r, + private __CLC_DOUBLEN *rr, + private __CLC_INTN *regn) { + // How many pi/2 is x a multiple of? + const __CLC_DOUBLEN two_by_pi = 0x1.45f306dc9c883p-1; + __CLC_DOUBLEN dnpi2 = __clc_trunc(__clc_fma(x, two_by_pi, 0.5)); + + const __CLC_DOUBLEN piby2_h = -7074237752028440.0 / 0x1.0p+52; + const __CLC_DOUBLEN piby2_m = -2483878800010755.0 / 0x1.0p+105; + const __CLC_DOUBLEN piby2_t = -3956492004828932.0 / 0x1.0p+158; + + // Compute product of npi2 with 159 bits of 2/pi + __CLC_DOUBLEN p_hh = piby2_h * dnpi2; + __CLC_DOUBLEN p_ht = __clc_fma(piby2_h, dnpi2, -p_hh); + __CLC_DOUBLEN p_mh = piby2_m * dnpi2; + __CLC_DOUBLEN p_mt = __clc_fma(piby2_m, dnpi2, -p_mh); + __CLC_DOUBLEN p_th = piby2_t * dnpi2; + __CLC_DOUBLEN p_tt = __clc_fma(piby2_t, dnpi2, -p_th); + + // Reduce to 159 bits + __CLC_DOUBLEN ph = p_hh; + __CLC_DOUBLEN pm = p_ht + p_mh; + __CLC_DOUBLEN t = p_mh - (pm - p_ht); + __CLC_DOUBLEN pt = p_th + t + p_mt + p_tt; + t = ph + pm; + pm = pm - (t - ph); + ph = t; + t = pm + pt; + pt = pt - (t - pm); + pm = t; + + // Subtract from x + t = x + ph; + __CLC_DOUBLEN qh = t + pm; + __CLC_DOUBLEN qt = pm - (qh - t) + pt; + + *r = qh; + *rr = qt; + *regn = __CLC_CONVERT_INTN(__CLC_CONVERT_LONGN(dnpi2)) & 0x3; +} + +// Given positive argument x, reduce it to the range [-pi/4,pi/4] using +// extra precision, and return the result in r, rr. +// Return value "regn" tells how many lots of pi/2 were subtracted +// from x to put it in the range [-pi/4,pi/4], mod 4. +_CLC_DEF _CLC_OVERLOAD void +__clc_remainder_piby2_large(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r, + private __CLC_DOUBLEN *rr, + private __CLC_INTN *regn) { + + __CLC_LONGN ux = __CLC_AS_LONGN(x); + __CLC_INTN e = __CLC_CONVERT_INTN(ux >> 52) - 1023; + __CLC_INTN i = __clc_max(23, (e >> 3) + 17); + __CLC_INTN j = 150 - i; + __CLC_INTN j16 = j & ~0xf; + __CLC_DOUBLEN fract_temp; + + // The following extracts 192 consecutive bits of 2/pi aligned on an arbitrary + // byte boundary + __CLC_ULONGN q0 = USE_TABLE(pibits_tbl, j16); + __CLC_ULONGN q3 = USE_TABLE(pibits_tbl, (j16 + 8)); + __CLC_ULONGN q1 = USE_TABLE(pibits_tbl, (j16 + 16)); + __CLC_ULONGN q4 = USE_TABLE(pibits_tbl, (j16 + 24)); + __CLC_ULONGN q2 = USE_TABLE(pibits_tbl, (j16 + 32)); + + __CLC_INTN k = (j >> 2) & 0x3; + __CLC_INTN c0 = k == 0; + __CLC_INTN c1 = k == 1; + __CLC_INTN c2 = k == 2; + __CLC_INTN c3 = k == 3; + + __CLC_UINTN u0, u1, u2, u3, u4, u5, u6; + + __CLC_UINTN q0s0 = __CLC_CONVERT_UINTN(q0); + __CLC_UINTN q0s1 = __CLC_CONVERT_UINTN(q0 >> 32); + + __CLC_UINTN q1s0 = __CLC_CONVERT_UINTN(q1); + __CLC_UINTN q1s1 = __CLC_CONVERT_UINTN(q1 >> 32); + + __CLC_UINTN q2s0 = __CLC_CONVERT_UINTN(q2); + __CLC_UINTN q2s1 = __CLC_CONVERT_UINTN(q2 >> 32); + + __CLC_UINTN q3s0 = __CLC_CONVERT_UINTN(q3); + __CLC_UINTN q3s1 = __CLC_CONVERT_UINTN(q3 >> 32); + + __CLC_UINTN q4s0 = __CLC_CONVERT_UINTN(q4); + __CLC_UINTN q4s1 = __CLC_CONVERT_UINTN(q4 >> 32); + + u0 = c1 ? q0s1 : q0s0; + u0 = c2 ? q3s0 : u0; + u0 = c3 ? q3s1 : u0; + + u1 = c1 ? q3s0 : q0s1; + u1 = c2 ? q3s1 : u1; + u1 = c3 ? q1s0 : u1; + + u2 = c1 ? q3s1 : q3s0; + u2 = c2 ? q1s0 : u2; + u2 = c3 ? q1s1 : u2; + + u3 = c1 ? q1s0 : q3s1; + u3 = c2 ? q1s1 : u3; + u3 = c3 ? q4s0 : u3; + + u4 = c1 ? q1s1 : q1s0; + u4 = c2 ? q4s0 : u4; + u4 = c3 ? q4s1 : u4; + + u5 = c1 ? q4s0 : q1s1; + u5 = c2 ? q4s1 : u5; + u5 = c3 ? q2s0 : u5; + + u6 = c1 ? q4s1 : q4s0; + u6 = c2 ? q2s0 : u6; + u6 = c3 ? q2s1 : u6; + + __CLC_UINTN v0 = bytealign(u1, u0, j); + __CLC_UINTN v1 = bytealign(u2, u1, j); + __CLC_UINTN v2 = bytealign(u3, u2, j); + __CLC_UINTN v3 = bytealign(u4, u3, j); + __CLC_UINTN v4 = bytealign(u5, u4, j); + __CLC_UINTN v5 = bytealign(u6, u5, j); + + // Place those 192 bits in 4 48-bit doubles along with correct exponent + // If i > 1018 we would get subnormals so we scale p up and x down to get the + // same product + i = 2 + 8 * i; + x *= __CLC_CONVERT_BIT_INTN(i > 1018) ? 0x1.0p-136 : 1.0; + i -= i > 1018 ? 136 : 0; + +#define doublen_lohi(x, y) \ + __CLC_AS_DOUBLEN(__CLC_CONVERT_ULONGN((x)) & 0xFFFFFFFF | \ + __CLC_CONVERT_ULONGN((y)) << 32) + + __CLC_UINTN ua = __CLC_CONVERT_UINTN(1023 + 52 - i) << 20; + __CLC_DOUBLEN a = doublen_lohi((__CLC_ULONGN)0, ua); + __CLC_DOUBLEN p0 = doublen_lohi(v0, ua | (v1 & 0xffffU)) - a; + ua += 0x03000000U; + a = doublen_lohi((__CLC_ULONGN)0, ua); + __CLC_DOUBLEN p1 = + doublen_lohi(((v2 << 16) | (v1 >> 16)), (ua | (v2 >> 16))) - a; + ua += 0x03000000U; + a = doublen_lohi((__CLC_ULONGN)0, ua); + __CLC_DOUBLEN p2 = doublen_lohi(v3, (ua | (v4 & 0xffffU))) - a; + ua += 0x03000000U; + a = doublen_lohi((__CLC_ULONGN)0, ua); + __CLC_DOUBLEN p3 = + doublen_lohi(((v5 << 16) | (v4 >> 16)), (ua | (v5 >> 16))) - a; + +#undef doublen_lohi + + // Exact multiply + __CLC_DOUBLEN f0h = p0 * x; + __CLC_DOUBLEN f0l = __clc_fma(p0, x, -f0h); + __CLC_DOUBLEN f1h = p1 * x; + __CLC_DOUBLEN f1l = __clc_fma(p1, x, -f1h); + __CLC_DOUBLEN f2h = p2 * x; + __CLC_DOUBLEN f2l = __clc_fma(p2, x, -f2h); + __CLC_DOUBLEN f3h = p3 * x; + __CLC_DOUBLEN f3l = __clc_fma(p3, x, -f3h); + + // Accumulate product into 4 doubles + __CLC_DOUBLEN s, t; + + __CLC_DOUBLEN f3 = f3h + f2h; + t = f2h - (f3 - f3h); + s = f3l + t; + t = t - (s - f3l); + + __CLC_DOUBLEN f2 = s + f1h; + t = f1h - (f2 - s) + t; + s = f2l + t; + t = t - (s - f2l); + + __CLC_DOUBLEN f1 = s + f0h; + t = f0h - (f1 - s) + t; + s = f1l + t; + + __CLC_DOUBLEN f0 = s + f0l; + + // Strip off unwanted large integer bits + f3 = 0x1.0p+10 * __clc_fract(f3 * 0x1.0p-10, &fract_temp); + f3 += f3 + f2 < 0.0 ? 0x1.0p+10 : 0.0; + + // Compute least significant integer bits + t = f3 + f2; + __CLC_DOUBLEN di = t - __clc_fract(t, &fract_temp); + i = __CLC_CONVERT_INTN(__CLC_CONVERT_FLOATN(di)); + + // Shift out remaining integer part + f3 -= di; + s = f3 + f2; + t = f2 - (s - f3); + f3 = s; + f2 = t; + s = f2 + f1; + t = f1 - (s - f2); + f2 = s; + f1 = t; + f1 += f0; + + // Subtract 1 if fraction is >= 0.5, and update regn + __CLC_INTN g = __CLC_CONVERT_INTN(f3 >= 0.5 ? 1L : 0L); + i += g; + f3 -= __CLC_CONVERT_DOUBLEN(__CLC_CONVERT_FLOATN(g)); + + // Shift up bits + s = f3 + f2; + t = f2 - (s - f3); + f3 = s; + f2 = t + f1; + + // Multiply precise fraction by pi/2 to get radians + const __CLC_DOUBLEN p2h = 7074237752028440.0 / 0x1.0p+52; + const __CLC_DOUBLEN p2t = 4967757600021510.0 / 0x1.0p+106; + + __CLC_DOUBLEN rhi = f3 * p2h; + __CLC_DOUBLEN rlo = + __clc_fma(f2, p2h, __clc_fma(f3, p2t, __clc_fma(f3, p2h, -rhi))); + + *r = rhi + rlo; + *rr = rlo - (*r - rhi); + *regn = i & 0x3; +} diff --git a/libclc/clc/lib/generic/math/clc_tables.cl b/libclc/clc/lib/generic/math/clc_tables.cl index 7ee744589ae1d..05053aea7bc12 100644 --- a/libclc/clc/lib/generic/math/clc_tables.cl +++ b/libclc/clc/lib/generic/math/clc_tables.cl @@ -1648,4 +1648,66 @@ CLC_TABLE_FUNCTION(double, SINH_TBL_TAIL, sinh_tbl_tail); CLC_TABLE_FUNCTION(double, COSH_TBL_HEAD, cosh_tbl_head); CLC_TABLE_FUNCTION(double, COSH_TBL_TAIL, cosh_tbl_tail); +DECLARE_TABLE(uchar, PIBITS_TBL, ) = { + 224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175, + 169, 75, 74, 41, 222, 231, 28, 244, 236, 197, 151, 175, 31, + 235, 158, 212, 181, 168, 127, 121, 154, 253, 24, 61, 221, 38, + 44, 159, 60, 251, 217, 180, 125, 180, 41, 104, 45, 70, 188, + 188, 63, 96, 22, 120, 255, 95, 226, 127, 236, 160, 228, 247, + 46, 126, 17, 114, 210, 231, 76, 13, 230, 88, 71, 230, 4, 249, + 125, 209, 154, 192, 113, 166, 19, 18, 237, 186, 212, 215, 8, + 162, 251, 156, 166, 196, 114, 172, 119, 248, 115, 72, 70, 39, + 168, 187, 36, 25, 128, 75, 55, 9, 233, 184, 145, 220, 134, 21, + 239, 122, 175, 142, 69, 249, 7, 65, 14, 241, 100, 86, 138, 109, + 3, 119, 211, 212, 71, 95, 157, 240, 167, 84, 16, 57, 185, 13, + 230, 139, 2, 0, 0, 0, 0, 0, 0, 0 +}; + +_CLC_DEF _CLC_OVERLOAD ulong TABLE_MANGLE(pibits_tbl)(int idx) { + return *(__constant ulong *)(PIBITS_TBL + idx); +} +_CLC_DEF _CLC_OVERLOAD ulong2 TABLE_MANGLE(pibits_tbl)(int2 idx) { + return (ulong2){*(__constant ulong *)(PIBITS_TBL + idx.s0), + *(__constant ulong *)(PIBITS_TBL + idx.s1)}; +} +_CLC_DEF _CLC_OVERLOAD ulong3 TABLE_MANGLE(pibits_tbl)(int3 idx) { + return (ulong3){*(__constant ulong *)(PIBITS_TBL + idx.s0), + *(__constant ulong *)(PIBITS_TBL + idx.s1), + *(__constant ulong *)(PIBITS_TBL + idx.s2)}; +} +_CLC_DEF _CLC_OVERLOAD ulong4 TABLE_MANGLE(pibits_tbl)(int4 idx) { + return (ulong4){*(__constant ulong *)(PIBITS_TBL + idx.s0), + *(__constant ulong *)(PIBITS_TBL + idx.s1), + *(__constant ulong *)(PIBITS_TBL + idx.s2), + *(__constant ulong *)(PIBITS_TBL + idx.s3)}; +} +_CLC_DEF _CLC_OVERLOAD ulong8 TABLE_MANGLE(pibits_tbl)(int8 idx) { + return (ulong8){*(__constant ulong *)(PIBITS_TBL + idx.s0), + *(__constant ulong *)(PIBITS_TBL + idx.s1), + *(__constant ulong *)(PIBITS_TBL + idx.s2), + *(__constant ulong *)(PIBITS_TBL + idx.s3), + *(__constant ulong *)(PIBITS_TBL + idx.s4), + *(__constant ulong *)(PIBITS_TBL + idx.s5), + *(__constant ulong *)(PIBITS_TBL + idx.s6), + *(__constant ulong *)(PIBITS_TBL + idx.s7)}; +} +_CLC_DEF _CLC_OVERLOAD ulong16 TABLE_MANGLE(pibits_tbl)(int16 idx) { + return (ulong16){*(__constant ulong *)(PIBITS_TBL + idx.s0), + *(__constant ulong *)(PIBITS_TBL + idx.s1), + *(__constant ulong *)(PIBITS_TBL + idx.s2), + *(__constant ulong *)(PIBITS_TBL + idx.s3), + *(__constant ulong *)(PIBITS_TBL + idx.s4), + *(__constant ulong *)(PIBITS_TBL + idx.s5), + *(__constant ulong *)(PIBITS_TBL + idx.s6), + *(__constant ulong *)(PIBITS_TBL + idx.s7), + *(__constant ulong *)(PIBITS_TBL + idx.s8), + *(__constant ulong *)(PIBITS_TBL + idx.s9), + *(__constant ulong *)(PIBITS_TBL + idx.sA), + *(__constant ulong *)(PIBITS_TBL + idx.sB), + *(__constant ulong *)(PIBITS_TBL + idx.sC), + *(__constant ulong *)(PIBITS_TBL + idx.sD), + *(__constant ulong *)(PIBITS_TBL + idx.sE), + *(__constant ulong *)(PIBITS_TBL + idx.sF)}; +} + #endif // cl_khr_fp64 diff --git a/libclc/clspv/lib/SOURCES b/libclc/clspv/lib/SOURCES index d2fea9d586287..437210d1922da 100644 --- a/libclc/clspv/lib/SOURCES +++ b/libclc/clspv/lib/SOURCES @@ -66,10 +66,8 @@ subnormal_config.cl ../../generic/lib/math/rootn.cl ../../generic/lib/math/sin.cl ../../generic/lib/math/sincos.cl -../../generic/lib/math/sincos_helpers.cl ../../generic/lib/math/sinh.cl ../../generic/lib/math/sinpi.cl -../../generic/lib/math/tables.cl ../../generic/lib/math/tan.cl ../../generic/lib/math/tanh.cl ../../generic/lib/math/tanpi.cl diff --git a/libclc/generic/include/clc/math/sincos.h b/libclc/generic/include/clc/math/sincos.h index fbd29c441e319..7426c23e88af9 100644 --- a/libclc/generic/include/clc/math/sincos.h +++ b/libclc/generic/include/clc/math/sincos.h @@ -6,5 +6,7 @@ // //===----------------------------------------------------------------------===// -#define __CLC_BODY <clc/math/sincos.inc> +#define __CLC_BODY <clc/math/unary_decl_with_ptr.inc> +#define __CLC_FUNCTION __clc_sincos #include <clc/math/gentype.inc> +#undef __CLC_FUNCTION diff --git a/libclc/generic/include/clc/math/sincos.inc b/libclc/generic/include/clc/math/sincos.inc deleted file mode 100644 index d6ec2fe94704e..0000000000000 --- a/libclc/generic/include/clc/math/sincos.inc +++ /dev/null @@ -1,14 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos(__CLC_GENTYPE x, - global __CLC_GENTYPE *cosval); -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos(__CLC_GENTYPE x, - local __CLC_GENTYPE *cosval); -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos(__CLC_GENTYPE x, - private __CLC_GENTYPE *cosval); diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 781aaf16a5244..8c7565e3dd231 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -131,7 +131,6 @@ math/native_rsqrt.cl math/native_sin.cl math/native_sqrt.cl math/native_tan.cl -math/tables.cl math/nextafter.cl math/pow.cl math/pown.cl @@ -144,7 +143,6 @@ math/round.cl math/rsqrt.cl math/sin.cl math/sincos.cl -math/sincos_helpers.cl math/sinh.cl math/sinpi.cl math/sqrt.cl diff --git a/libclc/generic/lib/math/clc_tan.cl b/libclc/generic/lib/math/clc_tan.cl index 7e28e9ffed3b6..ce51c1031fa45 100644 --- a/libclc/generic/lib/math/clc_tan.cl +++ b/libclc/generic/lib/math/clc_tan.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "sincos_helpers.h" #include <clc/clc.h> #include <clc/clcmacro.h> #include <clc/math/clc_fabs.h> diff --git a/libclc/generic/lib/math/cos.cl b/libclc/generic/lib/math/cos.cl index 00ffa371ad8fc..5b97c6a6f379a 100644 --- a/libclc/generic/lib/math/cos.cl +++ b/libclc/generic/lib/math/cos.cl @@ -6,45 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "sincos_helpers.h" #include <clc/clc.h> -#include <clc/clc_convert.h> -#include <clc/clcmacro.h> -#include <clc/math/clc_fabs.h> -#include <clc/math/clc_sincos_helpers.h> -#include <clc/math/math.h> -#include <clc/relational/clc_isinf.h> -#include <clc/relational/clc_isnan.h> -#include <clc/relational/clc_select.h> +#include <clc/math/clc_cos.h> -// FP32 and FP16 versions. -#define __CLC_BODY <cos.inc> +#define FUNCTION cos +#define __CLC_BODY <clc/shared/unary_def.inc> #include <clc/math/gentype.inc> - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double cos(double x) { - x = fabs(x); - - double r, rr; - int regn; - - if (x < 0x1.0p+47) - __clc_remainder_piby2_medium(x, &r, &rr, ®n); - else - __clc_remainder_piby2_large(x, &r, &rr, ®n); - - double2 sc = __clc_sincos_piby4(r, rr); - sc.lo = -sc.lo; - - int2 c = as_int2(regn & 1 ? sc.lo : sc.hi); - c.hi ^= (regn > 1) << 31; - - return isnan(x) | isinf(x) ? as_double(QNANBITPATT_DP64) : as_double(c); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cos, double); - -#endif diff --git a/libclc/generic/lib/math/cos.inc b/libclc/generic/lib/math/cos.inc deleted file mode 100644 index 1db96710457dd..0000000000000 --- a/libclc/generic/lib/math/cos.inc +++ /dev/null @@ -1,34 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#if __CLC_FPSIZE == 32 - -_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN cos(__CLC_FLOATN x) { - __CLC_FLOATN absx = __clc_fabs(x); - - __CLC_FLOATN r0, r1; - __CLC_INTN regn = __clc_argReductionS(&r0, &r1, absx); - - __CLC_FLOATN ss = -__clc_sinf_piby4(r0, r1); - __CLC_FLOATN cc = __clc_cosf_piby4(r0, r1); - - __CLC_FLOATN c = (regn & 1) != 0 ? ss : cc; - c = __CLC_AS_FLOATN(__CLC_AS_INTN(c) ^ ((regn > 1) << 31)); - - c = __clc_select(c, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isinf(x)); - - return c; -} - -#elif __CLC_FPSIZE == 16 - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE cos(__CLC_GENTYPE x) { - return __CLC_CONVERT_GENTYPE(cos(__CLC_CONVERT_FLOATN(x))); -} - -#endif diff --git a/libclc/generic/lib/math/sin.cl b/libclc/generic/lib/math/sin.cl index f776805ad1cdb..76728cfb1c5e5 100644 --- a/libclc/generic/lib/math/sin.cl +++ b/libclc/generic/lib/math/sin.cl @@ -6,44 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "sincos_helpers.h" #include <clc/clc.h> -#include <clc/clc_convert.h> -#include <clc/clcmacro.h> -#include <clc/math/clc_fabs.h> -#include <clc/math/clc_sincos_helpers.h> -#include <clc/math/math.h> -#include <clc/relational/clc_isinf.h> -#include <clc/relational/clc_isnan.h> -#include <clc/relational/clc_select.h> +#include <clc/math/clc_sin.h> -// FP32 and FP16 versions. -#define __CLC_BODY <sin.inc> +#define FUNCTION sin +#define __CLC_BODY <clc/shared/unary_def.inc> #include <clc/math/gentype.inc> - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double sin(double x) { - double y = fabs(x); - - double r, rr; - int regn; - - if (y < 0x1.0p+47) - __clc_remainder_piby2_medium(y, &r, &rr, ®n); - else - __clc_remainder_piby2_large(y, &r, &rr, ®n); - - double2 sc = __clc_sincos_piby4(r, rr); - - int2 s = as_int2(regn & 1 ? sc.hi : sc.lo); - s.hi ^= ((regn > 1) << 31) ^ ((x < 0.0) << 31); - - return isinf(x) | isnan(x) ? as_double(QNANBITPATT_DP64) : as_double(s); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sin, double); - -#endif diff --git a/libclc/generic/lib/math/sin.inc b/libclc/generic/lib/math/sin.inc deleted file mode 100644 index dbc99116dfa7d..0000000000000 --- a/libclc/generic/lib/math/sin.inc +++ /dev/null @@ -1,38 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#if __CLC_FPSIZE == 32 - -_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN sin(__CLC_FLOATN x) { - __CLC_FLOATN absx = __clc_fabs(x); - - __CLC_FLOATN r0, r1; - __CLC_INTN regn = __clc_argReductionS(&r0, &r1, absx); - - __CLC_FLOATN ss = __clc_sinf_piby4(r0, r1); - __CLC_FLOATN cc = __clc_cosf_piby4(r0, r1); - - __CLC_FLOATN s = (regn & 1) != 0 ? cc : ss; - s = __CLC_AS_FLOATN(__CLC_AS_INTN(s) ^ ((regn > 1) << 31) ^ - (__CLC_AS_INTN(x) ^ __CLC_AS_INTN(absx))); - - s = __clc_select(s, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isinf(x)); - - // Subnormals - s = x == 0.0f ? x : s; - - return s; -} - -#elif __CLC_FPSIZE == 16 - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE sin(__CLC_GENTYPE x) { - return __CLC_CONVERT_GENTYPE(sin(__CLC_CONVERT_FLOATN(x))); -} - -#endif diff --git a/libclc/generic/lib/math/sincos.cl b/libclc/generic/lib/math/sincos.cl index fc0e4d350c122..25e620cf2a38e 100644 --- a/libclc/generic/lib/math/sincos.cl +++ b/libclc/generic/lib/math/sincos.cl @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> +#include <clc/math/clc_sincos.h> -#define __CLC_BODY <sincos.inc> +#define FUNCTION sincos +#define __CLC_BODY <clc/math/unary_def_with_ptr.inc> #include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/sincos_helpers.cl b/libclc/generic/lib/math/sincos_helpers.cl deleted file mode 100644 index 651cd11ccf016..0000000000000 --- a/libclc/generic/lib/math/sincos_helpers.cl +++ /dev/null @@ -1,285 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "sincos_helpers.h" -#include <clc/clc.h> -#include <clc/integer/clc_clz.h> -#include <clc/integer/clc_mul_hi.h> -#include <clc/math/clc_fma.h> -#include <clc/math/clc_mad.h> -#include <clc/math/clc_trunc.h> -#include <clc/math/math.h> -#include <clc/math/tables.h> -#include <clc/shared/clc_max.h> - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -#define bytealign(src0, src1, src2) \ - ((uint)(((((long)(src0)) << 32) | (long)(src1)) >> (((src2) & 3) * 8))) - -// Reduction for medium sized arguments -_CLC_DEF void __clc_remainder_piby2_medium(double x, private double *r, - private double *rr, - private int *regn) { - // How many pi/2 is x a multiple of? - const double two_by_pi = 0x1.45f306dc9c883p-1; - double dnpi2 = __clc_trunc(__clc_fma(x, two_by_pi, 0.5)); - - const double piby2_h = -7074237752028440.0 / 0x1.0p+52; - const double piby2_m = -2483878800010755.0 / 0x1.0p+105; - const double piby2_t = -3956492004828932.0 / 0x1.0p+158; - - // Compute product of npi2 with 159 bits of 2/pi - double p_hh = piby2_h * dnpi2; - double p_ht = __clc_fma(piby2_h, dnpi2, -p_hh); - double p_mh = piby2_m * dnpi2; - double p_mt = __clc_fma(piby2_m, dnpi2, -p_mh); - double p_th = piby2_t * dnpi2; - double p_tt = __clc_fma(piby2_t, dnpi2, -p_th); - - // Reduce to 159 bits - double ph = p_hh; - double pm = p_ht + p_mh; - double t = p_mh - (pm - p_ht); - double pt = p_th + t + p_mt + p_tt; - t = ph + pm; - pm = pm - (t - ph); - ph = t; - t = pm + pt; - pt = pt - (t - pm); - pm = t; - - // Subtract from x - t = x + ph; - double qh = t + pm; - double qt = pm - (qh - t) + pt; - - *r = qh; - *rr = qt; - *regn = (int)(long)dnpi2 & 0x3; -} - -// Given positive argument x, reduce it to the range [-pi/4,pi/4] using -// extra precision, and return the result in r, rr. -// Return value "regn" tells how many lots of pi/2 were subtracted -// from x to put it in the range [-pi/4,pi/4], mod 4. - -_CLC_DEF void __clc_remainder_piby2_large(double x, private double *r, - private double *rr, - private int *regn) { - - long ux = as_long(x); - int e = (int)(ux >> 52) - 1023; - int i = __clc_max(23, (e >> 3) + 17); - int j = 150 - i; - int j16 = j & ~0xf; - double fract_temp; - - // The following extracts 192 consecutive bits of 2/pi aligned on an arbitrary - // byte boundary - uint4 q0 = USE_TABLE(pibits_tbl, j16); - uint4 q1 = USE_TABLE(pibits_tbl, (j16 + 16)); - uint4 q2 = USE_TABLE(pibits_tbl, (j16 + 32)); - - int k = (j >> 2) & 0x3; - int4 c = (int4)k == (int4)(0, 1, 2, 3); - - uint u0, u1, u2, u3, u4, u5, u6; - - u0 = c.s1 ? q0.s1 : q0.s0; - u0 = c.s2 ? q0.s2 : u0; - u0 = c.s3 ? q0.s3 : u0; - - u1 = c.s1 ? q0.s2 : q0.s1; - u1 = c.s2 ? q0.s3 : u1; - u1 = c.s3 ? q1.s0 : u1; - - u2 = c.s1 ? q0.s3 : q0.s2; - u2 = c.s2 ? q1.s0 : u2; - u2 = c.s3 ? q1.s1 : u2; - - u3 = c.s1 ? q1.s0 : q0.s3; - u3 = c.s2 ? q1.s1 : u3; - u3 = c.s3 ? q1.s2 : u3; - - u4 = c.s1 ? q1.s1 : q1.s0; - u4 = c.s2 ? q1.s2 : u4; - u4 = c.s3 ? q1.s3 : u4; - - u5 = c.s1 ? q1.s2 : q1.s1; - u5 = c.s2 ? q1.s3 : u5; - u5 = c.s3 ? q2.s0 : u5; - - u6 = c.s1 ? q1.s3 : q1.s2; - u6 = c.s2 ? q2.s0 : u6; - u6 = c.s3 ? q2.s1 : u6; - - uint v0 = bytealign(u1, u0, j); - uint v1 = bytealign(u2, u1, j); - uint v2 = bytealign(u3, u2, j); - uint v3 = bytealign(u4, u3, j); - uint v4 = bytealign(u5, u4, j); - uint v5 = bytealign(u6, u5, j); - - // Place those 192 bits in 4 48-bit doubles along with correct exponent - // If i > 1018 we would get subnormals so we scale p up and x down to get the - // same product - i = 2 + 8 * i; - x *= i > 1018 ? 0x1.0p-136 : 1.0; - i -= i > 1018 ? 136 : 0; - - uint ua = (uint)(1023 + 52 - i) << 20; - double a = as_double((uint2)(0, ua)); - double p0 = as_double((uint2)(v0, ua | (v1 & 0xffffU))) - a; - ua += 0x03000000U; - a = as_double((uint2)(0, ua)); - double p1 = as_double((uint2)((v2 << 16) | (v1 >> 16), ua | (v2 >> 16))) - a; - ua += 0x03000000U; - a = as_double((uint2)(0, ua)); - double p2 = as_double((uint2)(v3, ua | (v4 & 0xffffU))) - a; - ua += 0x03000000U; - a = as_double((uint2)(0, ua)); - double p3 = as_double((uint2)((v5 << 16) | (v4 >> 16), ua | (v5 >> 16))) - a; - - // Exact multiply - double f0h = p0 * x; - double f0l = __clc_fma(p0, x, -f0h); - double f1h = p1 * x; - double f1l = __clc_fma(p1, x, -f1h); - double f2h = p2 * x; - double f2l = __clc_fma(p2, x, -f2h); - double f3h = p3 * x; - double f3l = __clc_fma(p3, x, -f3h); - - // Accumulate product into 4 doubles - double s, t; - - double f3 = f3h + f2h; - t = f2h - (f3 - f3h); - s = f3l + t; - t = t - (s - f3l); - - double f2 = s + f1h; - t = f1h - (f2 - s) + t; - s = f2l + t; - t = t - (s - f2l); - - double f1 = s + f0h; - t = f0h - (f1 - s) + t; - s = f1l + t; - - double f0 = s + f0l; - - // Strip off unwanted large integer bits - f3 = 0x1.0p+10 * fract(f3 * 0x1.0p-10, &fract_temp); - f3 += f3 + f2 < 0.0 ? 0x1.0p+10 : 0.0; - - // Compute least significant integer bits - t = f3 + f2; - double di = t - fract(t, &fract_temp); - i = (float)di; - - // Shift out remaining integer part - f3 -= di; - s = f3 + f2; - t = f2 - (s - f3); - f3 = s; - f2 = t; - s = f2 + f1; - t = f1 - (s - f2); - f2 = s; - f1 = t; - f1 += f0; - - // Subtract 1 if fraction is >= 0.5, and update regn - int g = f3 >= 0.5; - i += g; - f3 -= (float)g; - - // Shift up bits - s = f3 + f2; - t = f2 - (s - f3); - f3 = s; - f2 = t + f1; - - // Multiply precise fraction by pi/2 to get radians - const double p2h = 7074237752028440.0 / 0x1.0p+52; - const double p2t = 4967757600021510.0 / 0x1.0p+106; - - double rhi = f3 * p2h; - double rlo = __clc_fma(f2, p2h, __clc_fma(f3, p2t, __clc_fma(f3, p2h, -rhi))); - - *r = rhi + rlo; - *rr = rlo - (*r - rhi); - *regn = i & 0x3; -} - -_CLC_DEF double2 __clc_sincos_piby4(double x, double xx) { - // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ... - // = x * (1 - x^2/3! + x^4/5! - x^6/7! ... - // = x * f(w) - // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ... - // We use a minimax approximation of (f(w) - 1) / w - // because this produces an expansion in even powers of x. - // If xx (the tail of x) is non-zero, we add a correction - // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx) - // is an approximation to cos(x)*sin(xx) valid because - // xx is tiny relative to x. - - // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ... - // = f(w) - // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ... - // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w) - // because this produces an expansion in even powers of x. - // If xx (the tail of x) is non-zero, we subtract a correction - // term g(x,xx) = x*xx to the result, where g(x,xx) - // is an approximation to sin(x)*sin(xx) valid because - // xx is tiny relative to x. - - const double sc1 = -0.166666666666666646259241729; - const double sc2 = 0.833333333333095043065222816e-2; - const double sc3 = -0.19841269836761125688538679e-3; - const double sc4 = 0.275573161037288022676895908448e-5; - const double sc5 = -0.25051132068021699772257377197e-7; - const double sc6 = 0.159181443044859136852668200e-9; - - const double cc1 = 0.41666666666666665390037e-1; - const double cc2 = -0.13888888888887398280412e-2; - const double cc3 = 0.248015872987670414957399e-4; - const double cc4 = -0.275573172723441909470836e-6; - const double cc5 = 0.208761463822329611076335e-8; - const double cc6 = -0.113826398067944859590880e-10; - - double x2 = x * x; - double x3 = x2 * x; - double r = 0.5 * x2; - double t = 1.0 - r; - - double sp = __clc_fma( - __clc_fma(__clc_fma(__clc_fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2); - - double cp = - t + - __clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(cc6, x2, cc5), - x2, cc4), - x2, cc3), - x2, cc2), - x2, cc1), - x2 * x2, __clc_fma(x, xx, (1.0 - t) - r)); - - double2 ret; - ret.lo = - x - __clc_fma(-x3, sc1, __clc_fma(__clc_fma(-x3, sp, 0.5 * xx), x2, -xx)); - ret.hi = cp; - - return ret; -} - -#endif diff --git a/libclc/generic/lib/math/sincos_helpers.h b/libclc/generic/lib/math/sincos_helpers.h deleted file mode 100644 index 11cb93f34850d..0000000000000 --- a/libclc/generic/lib/math/sincos_helpers.h +++ /dev/null @@ -1,24 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <clc/clcfunc.h> -#include <clc/clctypes.h> - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DECL void __clc_remainder_piby2_medium(double x, private double *r, - private double *rr, - private int *regn); -_CLC_DECL void __clc_remainder_piby2_large(double x, private double *r, - private double *rr, - private int *regn); -_CLC_DECL double2 __clc_sincos_piby4(double x, double xx); - -#endif diff --git a/libclc/generic/lib/math/tables.cl b/libclc/generic/lib/math/tables.cl deleted file mode 100644 index 1bda480a9fa72..0000000000000 --- a/libclc/generic/lib/math/tables.cl +++ /dev/null @@ -1,30 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <clc/clc.h> - -#include <clc/math/tables.h> - -DECLARE_TABLE(uchar, PIBITS_TBL, ) = { - 224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175, - 169, 75, 74, 41, 222, 231, 28, 244, 236, 197, 151, 175, 31, - 235, 158, 212, 181, 168, 127, 121, 154, 253, 24, 61, 221, 38, - 44, 159, 60, 251, 217, 180, 125, 180, 41, 104, 45, 70, 188, - 188, 63, 96, 22, 120, 255, 95, 226, 127, 236, 160, 228, 247, - 46, 126, 17, 114, 210, 231, 76, 13, 230, 88, 71, 230, 4, 249, - 125, 209, 154, 192, 113, 166, 19, 18, 237, 186, 212, 215, 8, - 162, 251, 156, 166, 196, 114, 172, 119, 248, 115, 72, 70, 39, - 168, 187, 36, 25, 128, 75, 55, 9, 233, 184, 145, 220, 134, 21, - 239, 122, 175, 142, 69, 249, 7, 65, 14, 241, 100, 86, 138, 109, - 3, 119, 211, 212, 71, 95, 157, 240, 167, 84, 16, 57, 185, 13, - 230, 139, 2, 0, 0, 0, 0, 0, 0, 0 -}; - -uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) { - return *(__constant uint4 *)(PIBITS_TBL + idx); -} diff --git a/libclc/spirv/lib/SOURCES b/libclc/spirv/lib/SOURCES index 5446fe13a6d93..20e052253d977 100644 --- a/libclc/spirv/lib/SOURCES +++ b/libclc/spirv/lib/SOURCES @@ -55,7 +55,6 @@ math/fma.cl ../../generic/lib/math/log2.cl ../../generic/lib/math/logb.cl ../../generic/lib/math/modf.cl -../../generic/lib/math/tables.cl ../../generic/lib/math/pow.cl ../../generic/lib/math/pown.cl ../../generic/lib/math/powr.cl @@ -64,7 +63,6 @@ math/fma.cl ../../generic/lib/math/rootn.cl ../../generic/lib/math/sin.cl ../../generic/lib/math/sincos.cl -../../generic/lib/math/sincos_helpers.cl ../../generic/lib/math/sinh.cl ../../generic/lib/math/sinpi.cl ../../generic/lib/math/clc_tan.cl >From 212ae34f61507cc71dcd20f21af7a02e8e62593a Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Thu, 1 May 2025 17:48:20 +0100 Subject: [PATCH 2/3] update table vars --- .../generic/math/clc_sincos_helpers_fp64.inc | 69 ++++++++++--------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc index 3a8edb378cae2..9b5776d9b05e9 100644 --- a/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc +++ b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc @@ -59,7 +59,7 @@ __clc_remainder_piby2_large(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r, private __CLC_INTN *regn) { __CLC_LONGN ux = __CLC_AS_LONGN(x); - __CLC_INTN e = __CLC_CONVERT_INTN(ux >> 52) - 1023; + __CLC_INTN e = __CLC_CONVERT_INTN(ux >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; __CLC_INTN i = __clc_max(23, (e >> 3) + 17); __CLC_INTN j = 150 - i; __CLC_INTN j16 = j & ~0xf; @@ -68,18 +68,10 @@ __clc_remainder_piby2_large(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r, // The following extracts 192 consecutive bits of 2/pi aligned on an arbitrary // byte boundary __CLC_ULONGN q0 = USE_TABLE(pibits_tbl, j16); - __CLC_ULONGN q3 = USE_TABLE(pibits_tbl, (j16 + 8)); - __CLC_ULONGN q1 = USE_TABLE(pibits_tbl, (j16 + 16)); - __CLC_ULONGN q4 = USE_TABLE(pibits_tbl, (j16 + 24)); - __CLC_ULONGN q2 = USE_TABLE(pibits_tbl, (j16 + 32)); - - __CLC_INTN k = (j >> 2) & 0x3; - __CLC_INTN c0 = k == 0; - __CLC_INTN c1 = k == 1; - __CLC_INTN c2 = k == 2; - __CLC_INTN c3 = k == 3; - - __CLC_UINTN u0, u1, u2, u3, u4, u5, u6; + __CLC_ULONGN q1 = USE_TABLE(pibits_tbl, (j16 + 8)); + __CLC_ULONGN q2 = USE_TABLE(pibits_tbl, (j16 + 16)); + __CLC_ULONGN q3 = USE_TABLE(pibits_tbl, (j16 + 24)); + __CLC_ULONGN q4 = USE_TABLE(pibits_tbl, (j16 + 32)); __CLC_UINTN q0s0 = __CLC_CONVERT_UINTN(q0); __CLC_UINTN q0s1 = __CLC_CONVERT_UINTN(q0 >> 32); @@ -96,33 +88,41 @@ __clc_remainder_piby2_large(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r, __CLC_UINTN q4s0 = __CLC_CONVERT_UINTN(q4); __CLC_UINTN q4s1 = __CLC_CONVERT_UINTN(q4 >> 32); + __CLC_INTN k = (j >> 2) & 0x3; + __CLC_INTN c0 = k == 0; + __CLC_INTN c1 = k == 1; + __CLC_INTN c2 = k == 2; + __CLC_INTN c3 = k == 3; + + __CLC_UINTN u0, u1, u2, u3, u4, u5, u6; + u0 = c1 ? q0s1 : q0s0; - u0 = c2 ? q3s0 : u0; - u0 = c3 ? q3s1 : u0; + u0 = c2 ? q1s0 : u0; + u0 = c3 ? q1s1 : u0; - u1 = c1 ? q3s0 : q0s1; - u1 = c2 ? q3s1 : u1; - u1 = c3 ? q1s0 : u1; + u1 = c1 ? q1s0 : q0s1; + u1 = c2 ? q1s1 : u1; + u1 = c3 ? q2s0 : u1; - u2 = c1 ? q3s1 : q3s0; - u2 = c2 ? q1s0 : u2; - u2 = c3 ? q1s1 : u2; + u2 = c1 ? q1s1 : q1s0; + u2 = c2 ? q2s0 : u2; + u2 = c3 ? q2s1 : u2; - u3 = c1 ? q1s0 : q3s1; - u3 = c2 ? q1s1 : u3; - u3 = c3 ? q4s0 : u3; + u3 = c1 ? q2s0 : q1s1; + u3 = c2 ? q2s1 : u3; + u3 = c3 ? q3s0 : u3; - u4 = c1 ? q1s1 : q1s0; - u4 = c2 ? q4s0 : u4; - u4 = c3 ? q4s1 : u4; + u4 = c1 ? q2s1 : q2s0; + u4 = c2 ? q3s0 : u4; + u4 = c3 ? q3s1 : u4; - u5 = c1 ? q4s0 : q1s1; - u5 = c2 ? q4s1 : u5; - u5 = c3 ? q2s0 : u5; + u5 = c1 ? q3s0 : q2s1; + u5 = c2 ? q3s1 : u5; + u5 = c3 ? q4s0 : u5; - u6 = c1 ? q4s1 : q4s0; - u6 = c2 ? q2s0 : u6; - u6 = c3 ? q2s1 : u6; + u6 = c1 ? q3s1 : q3s0; + u6 = c2 ? q4s0 : u6; + u6 = c3 ? q4s1 : u6; __CLC_UINTN v0 = bytealign(u1, u0, j); __CLC_UINTN v1 = bytealign(u2, u1, j); @@ -142,7 +142,8 @@ __clc_remainder_piby2_large(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r, __CLC_AS_DOUBLEN(__CLC_CONVERT_ULONGN((x)) & 0xFFFFFFFF | \ __CLC_CONVERT_ULONGN((y)) << 32) - __CLC_UINTN ua = __CLC_CONVERT_UINTN(1023 + 52 - i) << 20; + __CLC_UINTN ua = __CLC_CONVERT_UINTN(EXPBIAS_DP64 + EXPSHIFTBITS_DP64 - i) + << 20; __CLC_DOUBLEN a = doublen_lohi((__CLC_ULONGN)0, ua); __CLC_DOUBLEN p0 = doublen_lohi(v0, ua | (v1 & 0xffffU)) - a; ua += 0x03000000U; >From f629943b86bab7a283979a15adc15633c68e882f Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 12 May 2025 11:17:17 +0100 Subject: [PATCH 3/3] fix formatting --- libclc/clc/lib/generic/math/clc_tables.cl | 24 +++++++++++------------ 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/libclc/clc/lib/generic/math/clc_tables.cl b/libclc/clc/lib/generic/math/clc_tables.cl index 05053aea7bc12..7db00532c8be5 100644 --- a/libclc/clc/lib/generic/math/clc_tables.cl +++ b/libclc/clc/lib/generic/math/clc_tables.cl @@ -1649,19 +1649,17 @@ CLC_TABLE_FUNCTION(double, COSH_TBL_HEAD, cosh_tbl_head); CLC_TABLE_FUNCTION(double, COSH_TBL_TAIL, cosh_tbl_tail); DECLARE_TABLE(uchar, PIBITS_TBL, ) = { - 224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175, - 169, 75, 74, 41, 222, 231, 28, 244, 236, 197, 151, 175, 31, - 235, 158, 212, 181, 168, 127, 121, 154, 253, 24, 61, 221, 38, - 44, 159, 60, 251, 217, 180, 125, 180, 41, 104, 45, 70, 188, - 188, 63, 96, 22, 120, 255, 95, 226, 127, 236, 160, 228, 247, - 46, 126, 17, 114, 210, 231, 76, 13, 230, 88, 71, 230, 4, 249, - 125, 209, 154, 192, 113, 166, 19, 18, 237, 186, 212, 215, 8, - 162, 251, 156, 166, 196, 114, 172, 119, 248, 115, 72, 70, 39, - 168, 187, 36, 25, 128, 75, 55, 9, 233, 184, 145, 220, 134, 21, - 239, 122, 175, 142, 69, 249, 7, 65, 14, 241, 100, 86, 138, 109, - 3, 119, 211, 212, 71, 95, 157, 240, 167, 84, 16, 57, 185, 13, - 230, 139, 2, 0, 0, 0, 0, 0, 0, 0 -}; + 224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175, 169, + 75, 74, 41, 222, 231, 28, 244, 236, 197, 151, 175, 31, 235, 158, 212, + 181, 168, 127, 121, 154, 253, 24, 61, 221, 38, 44, 159, 60, 251, 217, + 180, 125, 180, 41, 104, 45, 70, 188, 188, 63, 96, 22, 120, 255, 95, + 226, 127, 236, 160, 228, 247, 46, 126, 17, 114, 210, 231, 76, 13, 230, + 88, 71, 230, 4, 249, 125, 209, 154, 192, 113, 166, 19, 18, 237, 186, + 212, 215, 8, 162, 251, 156, 166, 196, 114, 172, 119, 248, 115, 72, 70, + 39, 168, 187, 36, 25, 128, 75, 55, 9, 233, 184, 145, 220, 134, 21, + 239, 122, 175, 142, 69, 249, 7, 65, 14, 241, 100, 86, 138, 109, 3, + 119, 211, 212, 71, 95, 157, 240, 167, 84, 16, 57, 185, 13, 230, 139, + 2, 0, 0, 0, 0, 0, 0, 0}; _CLC_DEF _CLC_OVERLOAD ulong TABLE_MANGLE(pibits_tbl)(int idx) { return *(__constant ulong *)(PIBITS_TBL + idx); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits