Author: Fraser Cormack Date: 2025-04-01T18:15:37+01:00 New Revision: f14ff59da7f98a405999bcc8481b20446de0d0cd
URL: https://github.com/llvm/llvm-project/commit/f14ff59da7f98a405999bcc8481b20446de0d0cd DIFF: https://github.com/llvm/llvm-project/commit/f14ff59da7f98a405999bcc8481b20446de0d0cd.diff LOG: [libclc] Move exp, exp2 and expm1 to the CLC library (#133932) These all share the use of a common helper function so are handled in one go. These builtins are also now vectorized. Added: libclc/clc/include/clc/math/clc_exp.h libclc/clc/include/clc/math/clc_exp2.h libclc/clc/include/clc/math/clc_exp_helper.h libclc/clc/include/clc/math/clc_exp_helper.inc libclc/clc/include/clc/math/clc_expm1.h libclc/clc/lib/generic/math/clc_exp.cl libclc/clc/lib/generic/math/clc_exp.inc libclc/clc/lib/generic/math/clc_exp2.cl libclc/clc/lib/generic/math/clc_exp2.inc libclc/clc/lib/generic/math/clc_exp_helper.cl libclc/clc/lib/generic/math/clc_exp_helper.inc libclc/clc/lib/generic/math/clc_expm1.cl libclc/clc/lib/generic/math/clc_expm1.inc Modified: libclc/clc/lib/generic/SOURCES libclc/clspv/lib/SOURCES libclc/generic/lib/SOURCES libclc/generic/lib/math/exp.cl libclc/generic/lib/math/exp2.cl libclc/generic/lib/math/expm1.cl libclc/spirv/lib/SOURCES Removed: libclc/generic/lib/math/exp_helper.cl libclc/generic/lib/math/exp_helper.h ################################################################################ diff --git a/libclc/generic/lib/math/exp_helper.h b/libclc/clc/include/clc/math/clc_exp.h similarity index 60% rename from libclc/generic/lib/math/exp_helper.h rename to libclc/clc/include/clc/math/clc_exp.h index 84a8febb4bb12..00b5a7f69779a 100644 --- a/libclc/generic/lib/math/exp_helper.h +++ b/libclc/clc/include/clc/math/clc_exp.h @@ -6,9 +6,15 @@ // //===----------------------------------------------------------------------===// -#ifdef cl_khr_fp64 +#ifndef __CLC_MATH_CLC_EXP_H__ +#define __CLC_MATH_CLC_EXP_H__ -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -_CLC_DECL double __clc_exp_helper(double x, double x_min, double x_max, double r, int n); +#define __CLC_BODY <clc/math/unary_decl.inc> +#define __CLC_FUNCTION __clc_exp -#endif +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_EXP_H__ diff --git a/libclc/clc/include/clc/math/clc_exp2.h b/libclc/clc/include/clc/math/clc_exp2.h new file mode 100644 index 0000000000000..20ee54b0a2755 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_exp2.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_EXP2_H__ +#define __CLC_MATH_CLC_EXP2_H__ + +#define __CLC_BODY <clc/math/unary_decl.inc> +#define __CLC_FUNCTION __clc_exp2 + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_EXP2_H__ diff --git a/libclc/clc/include/clc/math/clc_exp_helper.h b/libclc/clc/include/clc/math/clc_exp_helper.h new file mode 100644 index 0000000000000..8a2db5de16764 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_exp_helper.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_EXP_HELPER +#define __CLC_MATH_CLC_EXP_HELPER + +#define __DOUBLE_ONLY +#define __CLC_BODY <clc/math/clc_exp_helper.inc> + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __DOUBLE_ONLY + +#endif // __CLC_MATH_CLC_EXP_HELPER diff --git a/libclc/clc/include/clc/math/clc_exp_helper.inc b/libclc/clc/include/clc/math/clc_exp_helper.inc new file mode 100644 index 0000000000000..cdf650405c815 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_exp_helper.inc @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE __clc_exp_helper(__CLC_GENTYPE x, + __CLC_GENTYPE x_min, + __CLC_GENTYPE x_max, + __CLC_GENTYPE r, + __CLC_INTN n); diff --git a/libclc/clc/include/clc/math/clc_expm1.h b/libclc/clc/include/clc/math/clc_expm1.h new file mode 100644 index 0000000000000..0359c3916f1c5 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_expm1.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_EXPM1_H__ +#define __CLC_MATH_CLC_EXPM1_H__ + +#define __CLC_BODY <clc/math/unary_decl.inc> +#define __CLC_FUNCTION __clc_expm1 + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_EXPM1_H__ diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index becfa3ff6dbed..8c8932e722693 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -32,7 +32,11 @@ math/clc_ceil.cl math/clc_copysign.cl math/clc_cospi.cl math/clc_ep_log.cl +math/clc_exp.cl math/clc_exp10.cl +math/clc_exp2.cl +math/clc_expm1.cl +math/clc_exp_helper.cl math/clc_fabs.cl math/clc_fma.cl math/clc_fmod.cl diff --git a/libclc/clc/lib/generic/math/clc_exp.cl b/libclc/clc/lib/generic/math/clc_exp.cl new file mode 100644 index 0000000000000..6ff452721881c --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_exp.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/float/definitions.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_exp_helper.h> +#include <clc/math/clc_fma.h> +#include <clc/math/clc_mad.h> +#include <clc/math/math.h> +#include <clc/relational/clc_isnan.h> + +#define __CLC_BODY <clc_exp.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_exp.inc b/libclc/clc/lib/generic/math/clc_exp.inc new file mode 100644 index 0000000000000..5057bf8034e92 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_exp.inc @@ -0,0 +1,76 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp(__CLC_GENTYPE x) { + // Reduce x + const __CLC_GENTYPE ln2HI = 0x1.62e300p-1f; + const __CLC_GENTYPE ln2LO = 0x1.2fefa2p-17f; + const __CLC_GENTYPE invln2 = 0x1.715476p+0f; + + __CLC_GENTYPE fhalF = x < 0.0f ? -0.5f : 0.5f; + __CLC_INTN p = __CLC_CONVERT_INTN(__clc_mad(x, invln2, fhalF)); + __CLC_GENTYPE fp = __CLC_CONVERT_GENTYPE(p); + __CLC_GENTYPE hi = __clc_mad(fp, -ln2HI, x); // t*ln2HI is exact here + __CLC_GENTYPE lo = -fp * ln2LO; + + // Evaluate poly + __CLC_GENTYPE t = hi + lo; + __CLC_GENTYPE tt = t * t; + __CLC_GENTYPE v = __clc_mad( + tt, + -__clc_mad( + tt, + __clc_mad(tt, + __clc_mad(tt, + __clc_mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f), + 0x1.1566aap-14f), + -0x1.6c16c2p-9f), + 0x1.555556p-3f), + t); + + __CLC_GENTYPE y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi); + + // Scale by 2^p + __CLC_GENTYPE r = __CLC_AS_GENTYPE(__CLC_AS_INTN(y) + (p << 23)); + + // ln(largest_normal) = 88.72283905206835305366 + const __CLC_GENTYPE ulim = 0x1.62e430p+6f; + // ln(smallest_normal) = -87.33654475055310898657 + const __CLC_GENTYPE llim = -0x1.5d589ep+6f; + + r = x < llim ? 0.0f : r; + r = x < ulim ? r : __CLC_AS_GENTYPE((__CLC_UINTN)0x7f800000); + return __clc_isnan(x) ? x : r; +} + +#elif __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp(__CLC_GENTYPE x) { + + const __CLC_GENTYPE X_MIN = -0x1.74910d52d3051p+9; // -1075*ln(2) + const __CLC_GENTYPE X_MAX = 0x1.62e42fefa39efp+9; // 1024*ln(2) + const __CLC_GENTYPE R_64_BY_LOG2 = 0x1.71547652b82fep+6; // 64/ln(2) + const __CLC_GENTYPE R_LOG2_BY_64_LD = 0x1.62e42fefa0000p-7; // head ln(2)/64 + const __CLC_GENTYPE R_LOG2_BY_64_TL = 0x1.cf79abc9e3b39p-46; // tail ln(2)/64 + + __CLC_INTN n = __CLC_CONVERT_INTN(x * R_64_BY_LOG2); + __CLC_GENTYPE r = + __clc_fma(-R_LOG2_BY_64_TL, __CLC_CONVERT_GENTYPE(n), + __clc_fma(-R_LOG2_BY_64_LD, __CLC_CONVERT_GENTYPE(n), x)); + return __clc_exp_helper(x, X_MIN, X_MAX, r, n); +} + +#elif __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp(__CLC_GENTYPE x) { + return __CLC_CONVERT_GENTYPE(__clc_exp(__CLC_CONVERT_FLOATN(x))); +} + +#endif diff --git a/libclc/clc/lib/generic/math/clc_exp2.cl b/libclc/clc/lib/generic/math/clc_exp2.cl new file mode 100644 index 0000000000000..9635f84e5a9a6 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_exp2.cl @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/float/definitions.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_exp_helper.h> +#include <clc/math/clc_fma.h> +#include <clc/math/clc_mad.h> +#include <clc/math/clc_rint.h> +#include <clc/math/math.h> +#include <clc/relational/clc_isnan.h> + +#define __CLC_BODY <clc_exp2.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_exp2.inc b/libclc/clc/lib/generic/math/clc_exp2.inc new file mode 100644 index 0000000000000..6da361a43ed4c --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_exp2.inc @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp2(__CLC_GENTYPE x) { + // Reduce x + const __CLC_GENTYPE ln2HI = 0x1.62e300p-1f; + const __CLC_GENTYPE ln2LO = 0x1.2fefa2p-17f; + + __CLC_GENTYPE t = __clc_rint(x); + __CLC_INTN p = __CLC_CONVERT_INTN(t); + __CLC_GENTYPE tt = x - t; + __CLC_GENTYPE hi = tt * ln2HI; + __CLC_GENTYPE lo = tt * ln2LO; + + // Evaluate poly + t = hi + lo; + tt = t * t; + __CLC_GENTYPE v = __clc_mad( + tt, + -__clc_mad( + tt, + __clc_mad(tt, + __clc_mad(tt, + __clc_mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f), + 0x1.1566aap-14f), + -0x1.6c16c2p-9f), + 0x1.555556p-3f), + t); + + __CLC_GENTYPE y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi); + + // Scale by 2^p + __CLC_GENTYPE r = __CLC_AS_FLOATN(__CLC_AS_INTN(y) + (p << 23)); + + const __CLC_GENTYPE ulim = 128.0f; + const __CLC_GENTYPE llim = -126.0f; + + r = x < llim ? 0.0f : r; + r = x < ulim ? r : __CLC_AS_FLOATN((__CLC_UINTN)0x7f800000); + return __clc_isnan(x) ? x : r; +} + +#elif __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp2(__CLC_GENTYPE x) { + const __CLC_GENTYPE R_LN2 = 0x1.62e42fefa39efp-1; // ln(2) + const __CLC_GENTYPE R_1_BY_64 = 1.0 / 64.0; + + __CLC_INTN n = __CLC_CONVERT_INTN(x * 64.0); + __CLC_GENTYPE r = R_LN2 * __clc_fma(-R_1_BY_64, __CLC_CONVERT_GENTYPE(n), x); + + return __clc_exp_helper(x, -1074.0, 1024.0, r, n); +} + +#elif __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp2(__CLC_GENTYPE x) { + return __CLC_CONVERT_GENTYPE(__clc_exp2(__CLC_CONVERT_FLOATN(x))); +} + +#endif diff --git a/libclc/clc/lib/generic/math/clc_exp_helper.cl b/libclc/clc/lib/generic/math/clc_exp_helper.cl new file mode 100644 index 0000000000000..92ff8f7fe4e6f --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_exp_helper.cl @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_fma.h> +#include <clc/math/clc_ldexp.h> +#include <clc/math/math.h> +#include <clc/math/tables.h> +#include <clc/relational/clc_isnan.h> + +#define __DOUBLE_ONLY +#define __CLC_BODY <clc_exp_helper.inc> + +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_exp_helper.inc b/libclc/clc/lib/generic/math/clc_exp_helper.inc new file mode 100644 index 0000000000000..70ced7e9ea485 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_exp_helper.inc @@ -0,0 +1,54 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_exp_helper(__CLC_GENTYPE x, + __CLC_GENTYPE x_min, + __CLC_GENTYPE x_max, + __CLC_GENTYPE r, + __CLC_INTN n) { + + __CLC_INTN j = n & 0x3f; + __CLC_INTN m = n >> 6; + + // 6 term tail of Taylor expansion of e^r + __CLC_GENTYPE z2 = + r * __clc_fma( + r, + __clc_fma(r, + __clc_fma(r, + __clc_fma(r, + __clc_fma(r, 0x1.6c16c16c16c17p-10, + 0x1.1111111111111p-7), + 0x1.5555555555555p-5), + 0x1.5555555555555p-3), + 0x1.0000000000000p-1), + 1.0); + + __CLC_GENTYPE tv0 = USE_TABLE(two_to_jby64_ep_tbl_head, j); + __CLC_GENTYPE tv1 = USE_TABLE(two_to_jby64_ep_tbl_tail, j); + z2 = __clc_fma(tv0 + tv1, z2, tv1) + tv0; + + __CLC_INTN small_value = + (m < -1022) || ((m == -1022) && __CLC_CONVERT_INTN(z2 < 1.0)); + + __CLC_INTN n1 = m >> 2; + __CLC_INTN n2 = m - n1; + __CLC_GENTYPE z3 = + z2 * __CLC_AS_GENTYPE((__CLC_CONVERT_LONGN(n1) + 1023) << 52); + z3 *= __CLC_AS_GENTYPE((__CLC_CONVERT_LONGN(n2) + 1023) << 52); + + z2 = __clc_ldexp(z2, m); + z2 = __CLC_CONVERT_LONGN(small_value) ? z3 : z2; + + z2 = __clc_isnan(x) ? x : z2; + + z2 = x > x_max ? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64) : z2; + z2 = x < x_min ? 0.0 : z2; + + return z2; +} diff --git a/libclc/clc/lib/generic/math/clc_expm1.cl b/libclc/clc/lib/generic/math/clc_expm1.cl new file mode 100644 index 0000000000000..8695b46eb90ea --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_expm1.cl @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/float/definitions.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_exp_helper.h> +#include <clc/math/clc_fma.h> +#include <clc/math/clc_mad.h> +#include <clc/math/math.h> +#include <clc/math/tables.h> +#include <clc/relational/clc_isnan.h> + +#define __CLC_BODY <clc_expm1.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_expm1.inc b/libclc/clc/lib/generic/math/clc_expm1.inc new file mode 100644 index 0000000000000..6abee9b3f0cc9 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_expm1.inc @@ -0,0 +1,169 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/* Refer to the exp routine for the underlying algorithm */ +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_expm1(__CLC_GENTYPE x) { + // 128*log2 : 88.722839111673 + const __CLC_GENTYPE X_MAX = 0x1.62e42ep+6f; + // -149*log2 : -103.27892990343184 + const __CLC_GENTYPE X_MIN = -0x1.9d1da0p+6f; + // 64/log2 : 92.332482616893657 + const __CLC_GENTYPE R_64_BY_LOG2 = 0x1.715476p+6f; + // log2/64 lead: 0.0108032227 + const __CLC_GENTYPE R_LOG2_BY_64_LD = 0x1.620000p-7f; + // log2/64 tail: 0.0000272020388 + const __CLC_GENTYPE R_LOG2_BY_64_TL = 0x1.c85fdep-16f; + + __CLC_UINTN xi = __CLC_AS_UINTN(x); + __CLC_INTN n = __CLC_CONVERT_INTN(x * R_64_BY_LOG2); + __CLC_GENTYPE fn = __CLC_CONVERT_GENTYPE(n); + + __CLC_INTN j = n & 0x3f; + __CLC_INTN m = n >> 6; + + __CLC_GENTYPE r = + __clc_mad(fn, -R_LOG2_BY_64_TL, __clc_mad(fn, -R_LOG2_BY_64_LD, x)); + + // Truncated Taylor series + __CLC_GENTYPE z2 = __clc_mad( + r * r, __clc_mad(r, __clc_mad(r, 0x1.555556p-5f, 0x1.555556p-3f), 0.5f), + r); + + __CLC_GENTYPE m2 = __CLC_AS_GENTYPE((m + EXPBIAS_SP32) << EXPSHIFTBITS_SP32); + __CLC_GENTYPE exp_head = USE_TABLE(exp_tbl_ep_head, j); + __CLC_GENTYPE exp_tail = USE_TABLE(exp_tbl_ep_tail, j); + + __CLC_GENTYPE two_to_jby64_h = exp_head * m2; + __CLC_GENTYPE two_to_jby64_t = exp_tail * m2; + __CLC_GENTYPE two_to_jby64 = two_to_jby64_h + two_to_jby64_t; + + z2 = __clc_mad(z2, two_to_jby64, two_to_jby64_t) + (two_to_jby64_h - 1.0f); + // Make subnormals work + z2 = x == 0.f ? x : z2; + z2 = x < X_MIN || m < -24 ? -1.0f : z2; + z2 = x > X_MAX ? __CLC_AS_GENTYPE((__CLC_UINTN)PINFBITPATT_SP32) : z2; + z2 = __clc_isnan(x) ? x : z2; + + return z2; +} + +#elif __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_expm1(__CLC_GENTYPE x) { + const __CLC_GENTYPE max_expm1_arg = 709.8; + const __CLC_GENTYPE min_expm1_arg = -37.42994775023704; + // 0x3FCC8FF7C79A9A22 = log(1+1/4) + const __CLC_GENTYPE log_OnePlus_OneByFour = 0.22314355131420976; + // 0xBFD269621134DB93 = log(1-1/4) + const __CLC_GENTYPE log_OneMinus_OneByFour = -0.28768207245178096; + const __CLC_GENTYPE sixtyfour_by_lnof2 = + 92.33248261689366; // 0x40571547652b82fe + const __CLC_GENTYPE lnof2_by_64_head = + 0.010830424696223417; // 0x3f862e42fefa0000 + const __CLC_GENTYPE lnof2_by_64_tail = + 2.5728046223276688e-14; // 0x3d1cf79abc9e3b39 + + // First, assume log(1-1/4) < x < log(1+1/4) i.e -0.28768 < x < 0.22314 + __CLC_GENTYPE u = __CLC_AS_GENTYPE(__CLC_AS_ULONGN(x) & 0xffffffffff000000UL); + __CLC_GENTYPE v = x - u; + __CLC_GENTYPE y = u * u * 0.5; + __CLC_GENTYPE z = v * (x + u) * 0.5; + + __CLC_GENTYPE q = __clc_fma( + x, + __clc_fma( + x, + __clc_fma( + x, + __clc_fma( + x, + __clc_fma( + x, + __clc_fma(x, + __clc_fma(x, + __clc_fma(x, 2.4360682937111612e-8, + 2.7582184028154370e-7), + 2.7558212415361945e-6), + 2.4801576918453420e-5), + 1.9841269447671544e-4), + 1.3888888890687830e-3), + 8.3333333334012270e-3), + 4.1666666666665560e-2), + 1.6666666666666632e-1); + q *= x * x * x; + + __CLC_GENTYPE z1g = (u + y) + (q + (v + z)); + __CLC_GENTYPE z1 = x + (y + (q + z)); + z1 = y >= 0x1.0p-7 ? z1g : z1; + + // Now assume outside interval around 0 + __CLC_INTN n = __CLC_CONVERT_INTN(x * sixtyfour_by_lnof2); + __CLC_INTN j = n & 0x3f; + __CLC_INTN m = n >> 6; + + __CLC_GENTYPE f1 = USE_TABLE(two_to_jby64_ep_tbl_head, j); + __CLC_GENTYPE f2 = USE_TABLE(two_to_jby64_ep_tbl_tail, j); + __CLC_GENTYPE f = f1 + f2; + + __CLC_GENTYPE dn = __CLC_CONVERT_GENTYPE(-n); + __CLC_GENTYPE r = + __clc_fma(dn, lnof2_by_64_tail, __clc_fma(dn, lnof2_by_64_head, x)); + + q = __clc_fma(r, + __clc_fma(r, + __clc_fma(r, + __clc_fma(r, 1.38889490863777199667e-03, + 8.33336798434219616221e-03), + 4.16666666662260795726e-02), + 1.66666666665260878863e-01), + 5.00000000000000008883e-01); + q = __clc_fma(r * r, q, r); + + __CLC_GENTYPE twopm = __CLC_AS_GENTYPE(__CLC_CONVERT_LONGN(m + EXPBIAS_DP64) + << EXPSHIFTBITS_DP64); + __CLC_GENTYPE twopmm = __CLC_AS_GENTYPE(__CLC_CONVERT_LONGN(EXPBIAS_DP64 - m) + << EXPSHIFTBITS_DP64); + + // Computations for m > 52, including where result is close to Inf + __CLC_ULONGN uval = __CLC_AS_ULONGN(0x1.0p+1023 * (f1 + (f * q + (f2)))); + __CLC_INTN e = __CLC_CONVERT_INTN(uval >> EXPSHIFTBITS_DP64) + 1; + + __CLC_GENTYPE zme1024 = __CLC_AS_GENTYPE( + (__CLC_CONVERT_ULONGN(e) << EXPSHIFTBITS_DP64) | (uval & MANTBITS_DP64)); + zme1024 = __CLC_CONVERT_LONGN(e == 2047) + ? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64) + : zme1024; + + __CLC_GENTYPE zmg52 = twopm * (f1 + __clc_fma(f, q, f2 - twopmm)); + zmg52 = __CLC_CONVERT_LONGN(m == 1024) ? zme1024 : zmg52; + + // For m < 53 + __CLC_GENTYPE zml53 = + twopm * ((f1 - twopmm) + __clc_fma(f1, q, f2 * (1.0 + q))); + + // For m < -7 + __CLC_GENTYPE zmln7 = __clc_fma(twopm, f1 + __clc_fma(f, q, f2), -1.0); + + z = __CLC_CONVERT_LONGN(m < 53) ? zml53 : zmg52; + z = __CLC_CONVERT_LONGN(m < -7) ? zmln7 : z; + z = x > log_OneMinus_OneByFour && x < log_OnePlus_OneByFour ? z1 : z; + z = x > max_expm1_arg ? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64) : z; + z = x < min_expm1_arg ? -1.0 : z; + + return z; +} + +#elif __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_expm1(__CLC_GENTYPE x) { + return __CLC_CONVERT_GENTYPE(__clc_expm1(__CLC_CONVERT_FLOATN(x))); +} + +#endif diff --git a/libclc/clspv/lib/SOURCES b/libclc/clspv/lib/SOURCES index 64122395438aa..d2fea9d586287 100644 --- a/libclc/clspv/lib/SOURCES +++ b/libclc/clspv/lib/SOURCES @@ -25,7 +25,6 @@ subnormal_config.cl ../../generic/lib/math/exp.cl ../../generic/lib/math/exp10.cl ../../generic/lib/math/exp2.cl -../../generic/lib/math/exp_helper.cl ../../generic/lib/math/expm1.cl ../../generic/lib/math/fdim.cl ../../generic/lib/math/fmod.cl diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index a9dc2304c0d0e..5f473ff8b9424 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -97,7 +97,6 @@ math/cospi.cl math/erf.cl math/erfc.cl math/exp.cl -math/exp_helper.cl math/expm1.cl math/exp2.cl math/exp10.cl diff --git a/libclc/generic/lib/math/exp.cl b/libclc/generic/lib/math/exp.cl index bdf8023f6ec31..cebd630e52ab7 100644 --- a/libclc/generic/lib/math/exp.cl +++ b/libclc/generic/lib/math/exp.cl @@ -7,77 +7,8 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> -#include <clc/clcmacro.h> -#include <clc/math/math.h> +#include <clc/math/clc_exp.h> -_CLC_OVERLOAD _CLC_DEF float exp(float x) { - - // Reduce x - const float ln2HI = 0x1.62e300p-1f; - const float ln2LO = 0x1.2fefa2p-17f; - const float invln2 = 0x1.715476p+0f; - - float fhalF = x < 0.0f ? -0.5f : 0.5f; - int p = mad(x, invln2, fhalF); - float fp = (float)p; - float hi = mad(fp, -ln2HI, x); // t*ln2HI is exact here - float lo = -fp*ln2LO; - - // Evaluate poly - float t = hi + lo; - float tt = t*t; - float v = mad(tt, - -mad(tt, - mad(tt, - mad(tt, - mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f), - 0x1.1566aap-14f), - -0x1.6c16c2p-9f), - 0x1.555556p-3f), - t); - - float y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi); - - // Scale by 2^p - float r = as_float(as_int(y) + (p << 23)); - - const float ulim = 0x1.62e430p+6f; // ln(largest_normal) = 88.72283905206835305366 - const float llim = -0x1.5d589ep+6f; // ln(smallest_normal) = -87.33654475055310898657 - - r = x < llim ? 0.0f : r; - r = x < ulim ? r : as_float(0x7f800000); - return isnan(x) ? x : r; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, exp, float) - -#ifdef cl_khr_fp64 - -#include "exp_helper.h" - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double exp(double x) { - - const double X_MIN = -0x1.74910d52d3051p+9; // -1075*ln(2) - const double X_MAX = 0x1.62e42fefa39efp+9; // 1024*ln(2) - const double R_64_BY_LOG2 = 0x1.71547652b82fep+6; // 64/ln(2) - const double R_LOG2_BY_64_LD = 0x1.62e42fefa0000p-7; // head ln(2)/64 - const double R_LOG2_BY_64_TL = 0x1.cf79abc9e3b39p-46; // tail ln(2)/64 - - int n = convert_int(x * R_64_BY_LOG2); - double r = fma(-R_LOG2_BY_64_TL, (double)n, fma(-R_LOG2_BY_64_LD, (double)n, x)); - return __clc_exp_helper(x, X_MIN, X_MAX, r, n); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_FP16(exp) - -#endif +#define FUNCTION exp +#define __CLC_BODY <clc/shared/unary_def.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/exp2.cl b/libclc/generic/lib/math/exp2.cl index 611a40d4a8e68..465c39174a734 100644 --- a/libclc/generic/lib/math/exp2.cl +++ b/libclc/generic/lib/math/exp2.cl @@ -7,65 +7,8 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> -#include <clc/clcmacro.h> -#include <clc/math/math.h> +#include <clc/math/clc_exp2.h> -_CLC_OVERLOAD _CLC_DEF float exp2(float x) { - - // Reduce x - const float ln2HI = 0x1.62e300p-1f; - const float ln2LO = 0x1.2fefa2p-17f; - - float t = rint(x); - int p = (int)t; - float tt = x - t; - float hi = tt * ln2HI; - float lo = tt * ln2LO; - - // Evaluate poly - t = hi + lo; - tt = t*t; - float v = mad(tt, - -mad(tt, - mad(tt, - mad(tt, - mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f), - 0x1.1566aap-14f), - -0x1.6c16c2p-9f), - 0x1.555556p-3f), - t); - - float y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi); - - // Scale by 2^p - float r = as_float(as_int(y) + (p << 23)); - - const float ulim = 128.0f; - const float llim = -126.0f; - - r = x < llim ? 0.0f : r; - r = x < ulim ? r : as_float(0x7f800000); - return isnan(x) ? x : r; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, exp2, float) - -#ifdef cl_khr_fp64 - -#include "exp_helper.h" - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double exp2(double x) { - const double R_LN2 = 0x1.62e42fefa39efp-1; // ln(2) - const double R_1_BY_64 = 1.0 / 64.0; - - int n = convert_int(x * 64.0); - double r = R_LN2 * fma(-R_1_BY_64, (double)n, x); - return __clc_exp_helper(x, -1074.0, 1024.0, r, n); -} - - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp2, double) - -#endif +#define FUNCTION exp2 +#define __CLC_BODY <clc/shared/unary_def.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/exp_helper.cl b/libclc/generic/lib/math/exp_helper.cl deleted file mode 100644 index b413228719bfb..0000000000000 --- a/libclc/generic/lib/math/exp_helper.cl +++ /dev/null @@ -1,55 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <clc/clc.h> -#include <clc/math/math.h> -#include <clc/math/tables.h> - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEF double __clc_exp_helper(double x, double x_min, double x_max, double r, int n) { - - int j = n & 0x3f; - int m = n >> 6; - - // 6 term tail of Taylor expansion of e^r - double z2 = r * fma(r, - fma(r, - fma(r, - fma(r, - fma(r, 0x1.6c16c16c16c17p-10, 0x1.1111111111111p-7), - 0x1.5555555555555p-5), - 0x1.5555555555555p-3), - 0x1.0000000000000p-1), - 1.0); - - double tv0 = USE_TABLE(two_to_jby64_ep_tbl_head, j); - double tv1 = USE_TABLE(two_to_jby64_ep_tbl_tail, j); - z2 = fma(tv0 + tv1, z2, tv1) + tv0; - - int small_value = (m < -1022) || ((m == -1022) && (z2 < 1.0)); - - int n1 = m >> 2; - int n2 = m-n1; - double z3= z2 * as_double(((long)n1 + 1023) << 52); - z3 *= as_double(((long)n2 + 1023) << 52); - - z2 = ldexp(z2, m); - z2 = small_value ? z3: z2; - - z2 = isnan(x) ? x : z2; - - z2 = x > x_max ? as_double(PINFBITPATT_DP64) : z2; - z2 = x < x_min ? 0.0 : z2; - - return z2; -} - -#endif // cl_khr_fp64 diff --git a/libclc/generic/lib/math/expm1.cl b/libclc/generic/lib/math/expm1.cl index e66020e20fab4..b4eed66d692b4 100644 --- a/libclc/generic/lib/math/expm1.cl +++ b/libclc/generic/lib/math/expm1.cl @@ -7,151 +7,8 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> -#include <clc/clcmacro.h> -#include <clc/math/math.h> -#include <clc/math/tables.h> +#include <clc/math/clc_expm1.h> -/* Refer to the exp routine for the underlying algorithm */ - -_CLC_OVERLOAD _CLC_DEF float expm1(float x) { - const float X_MAX = 0x1.62e42ep+6f; // 128*log2 : 88.722839111673 - const float X_MIN = -0x1.9d1da0p+6f; // -149*log2 : -103.27892990343184 - - const float R_64_BY_LOG2 = 0x1.715476p+6f; // 64/log2 : 92.332482616893657 - const float R_LOG2_BY_64_LD = 0x1.620000p-7f; // log2/64 lead: 0.0108032227 - const float R_LOG2_BY_64_TL = 0x1.c85fdep-16f; // log2/64 tail: 0.0000272020388 - - uint xi = as_uint(x); - int n = (int)(x * R_64_BY_LOG2); - float fn = (float)n; - - int j = n & 0x3f; - int m = n >> 6; - - float r = mad(fn, -R_LOG2_BY_64_TL, mad(fn, -R_LOG2_BY_64_LD, x)); - - // Truncated Taylor series - float z2 = mad(r*r, mad(r, mad(r, 0x1.555556p-5f, 0x1.555556p-3f), 0.5f), r); - - float m2 = as_float((m + EXPBIAS_SP32) << EXPSHIFTBITS_SP32); - float exp_head = USE_TABLE(exp_tbl_ep_head, j); - float exp_tail = USE_TABLE(exp_tbl_ep_tail, j); - - float two_to_jby64_h = exp_head * m2; - float two_to_jby64_t = exp_tail * m2; - float two_to_jby64 = two_to_jby64_h + two_to_jby64_t; - - z2 = mad(z2, two_to_jby64, two_to_jby64_t) + (two_to_jby64_h - 1.0f); - //Make subnormals work - z2 = x == 0.f ? x : z2; - z2 = x < X_MIN | m < -24 ? -1.0f : z2; - z2 = x > X_MAX ? as_float(PINFBITPATT_SP32) : z2; - z2 = isnan(x) ? x : z2; - - return z2; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, expm1, float) - -#ifdef cl_khr_fp64 - -#include "exp_helper.h" - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double expm1(double x) { - const double max_expm1_arg = 709.8; - const double min_expm1_arg = -37.42994775023704; - const double log_OnePlus_OneByFour = 0.22314355131420976; //0x3FCC8FF7C79A9A22 = log(1+1/4) - const double log_OneMinus_OneByFour = -0.28768207245178096; //0xBFD269621134DB93 = log(1-1/4) - const double sixtyfour_by_lnof2 = 92.33248261689366; //0x40571547652b82fe - const double lnof2_by_64_head = 0.010830424696223417; //0x3f862e42fefa0000 - const double lnof2_by_64_tail = 2.5728046223276688e-14; //0x3d1cf79abc9e3b39 - - // First, assume log(1-1/4) < x < log(1+1/4) i.e -0.28768 < x < 0.22314 - double u = as_double(as_ulong(x) & 0xffffffffff000000UL); - double v = x - u; - double y = u * u * 0.5; - double z = v * (x + u) * 0.5; - - double q = fma(x, - fma(x, - fma(x, - fma(x, - fma(x, - fma(x, - fma(x, - fma(x,2.4360682937111612e-8, 2.7582184028154370e-7), - 2.7558212415361945e-6), - 2.4801576918453420e-5), - 1.9841269447671544e-4), - 1.3888888890687830e-3), - 8.3333333334012270e-3), - 4.1666666666665560e-2), - 1.6666666666666632e-1); - q *= x * x * x; - - double z1g = (u + y) + (q + (v + z)); - double z1 = x + (y + (q + z)); - z1 = y >= 0x1.0p-7 ? z1g : z1; - - // Now assume outside interval around 0 - int n = (int)(x * sixtyfour_by_lnof2); - int j = n & 0x3f; - int m = n >> 6; - - double f1 = USE_TABLE(two_to_jby64_ep_tbl_head, j); - double f2 = USE_TABLE(two_to_jby64_ep_tbl_tail, j); - double f = f1 + f2; - - double dn = -n; - double r = fma(dn, lnof2_by_64_tail, fma(dn, lnof2_by_64_head, x)); - - q = fma(r, - fma(r, - fma(r, - fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03), - 4.16666666662260795726e-02), - 1.66666666665260878863e-01), - 5.00000000000000008883e-01); - q = fma(r*r, q, r); - - double twopm = as_double((long)(m + EXPBIAS_DP64) << EXPSHIFTBITS_DP64); - double twopmm = as_double((long)(EXPBIAS_DP64 - m) << EXPSHIFTBITS_DP64); - - // Computations for m > 52, including where result is close to Inf - ulong uval = as_ulong(0x1.0p+1023 * (f1 + (f * q + (f2)))); - int e = (int)(uval >> EXPSHIFTBITS_DP64) + 1; - - double zme1024 = as_double(((long)e << EXPSHIFTBITS_DP64) | (uval & MANTBITS_DP64)); - zme1024 = e == 2047 ? as_double(PINFBITPATT_DP64) : zme1024; - - double zmg52 = twopm * (f1 + fma(f, q, f2 - twopmm)); - zmg52 = m == 1024 ? zme1024 : zmg52; - - // For m < 53 - double zml53 = twopm * ((f1 - twopmm) + fma(f1, q, f2*(1.0 + q))); - - // For m < -7 - double zmln7 = fma(twopm, f1 + fma(f, q, f2), -1.0); - - z = m < 53 ? zml53 : zmg52; - z = m < -7 ? zmln7 : z; - z = x > log_OneMinus_OneByFour & x < log_OnePlus_OneByFour ? z1 : z; - z = x > max_expm1_arg ? as_double(PINFBITPATT_DP64) : z; - z = x < min_expm1_arg ? -1.0 : z; - - return z; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, expm1, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_FP16(expm1) - -#endif +#define FUNCTION expm1 +#define __CLC_BODY <clc/shared/unary_def.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/spirv/lib/SOURCES b/libclc/spirv/lib/SOURCES index 5c6051398c58f..5446fe13a6d93 100644 --- a/libclc/spirv/lib/SOURCES +++ b/libclc/spirv/lib/SOURCES @@ -35,7 +35,6 @@ subnormal_config.cl ../../generic/lib/math/erf.cl ../../generic/lib/math/erfc.cl ../../generic/lib/math/exp.cl -../../generic/lib/math/exp_helper.cl ../../generic/lib/math/expm1.cl ../../generic/lib/math/exp2.cl ../../generic/lib/math/exp10.cl _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits