https://github.com/frasercrmck created 
https://github.com/llvm/llvm-project/pull/139527

This commit moves the remaining FP64 sin and cos helper functions to the CLC 
library. As a consequence, it formally moves all sin, cos and sincos builtins 
to the CLC library. Previously, the FP16 and FP32 were nominally there but 
still in the OpenCL layer while waiting for the FP64 ones.

The FP64 builtins are now vectorized as the FP16 and FP32 ones were earlier.

One helper table had to be changed. It was previously a table of bytes loaded 
by each work-item as uint4. Since this doesn't vectorize well, the table was 
split to load two ulongNs per work-item. While this might not be as efficient 
on some devices, one mitigating factor is that we were previously loading 48 
bytes per work-item in total, but only using 40 of them. With this commit we 
only load the bytes we need.

>From 875c5fa90fff79456da734ae4be1e4590df6d127 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Thu, 1 May 2025 17:24:58 +0100
Subject: [PATCH 1/2] [libclc] Move sin, cos & sincos to CLC library

This commit also vectorizes these builtins along the way.
---
 libclc/clc/include/clc/math/clc_cos.h         |  19 ++
 libclc/clc/include/clc/math/clc_sin.h         |  19 ++
 libclc/clc/include/clc/math/clc_sincos.h      |  19 ++
 .../clc/include/clc/math/clc_sincos_helpers.h |   7 +
 .../clc/math/clc_sincos_helpers_fp64.inc      |  17 ++
 libclc/clc/include/clc/math/tables.h          |   2 +-
 libclc/clc/lib/generic/SOURCES                |   3 +
 libclc/clc/lib/generic/math/clc_cos.cl        |  21 ++
 libclc/clc/lib/generic/math/clc_cos.inc       |  63 ++++
 libclc/clc/lib/generic/math/clc_sin.cl        |  25 ++
 libclc/clc/lib/generic/math/clc_sin.inc       |  68 +++++
 libclc/clc/lib/generic/math/clc_sincos.cl     |  14 +
 .../lib/generic/math/clc_sincos.inc}          |   8 +-
 .../lib/generic/math/clc_sincos_helpers.cl    |  24 ++
 .../generic/math/clc_sincos_helpers_fp64.inc  | 234 ++++++++++++++
 libclc/clc/lib/generic/math/clc_tables.cl     |  62 ++++
 libclc/clspv/lib/SOURCES                      |   2 -
 libclc/generic/include/clc/math/sincos.h      |   4 +-
 libclc/generic/include/clc/math/sincos.inc    |  14 -
 libclc/generic/lib/SOURCES                    |   2 -
 libclc/generic/lib/math/clc_tan.cl            |   1 -
 libclc/generic/lib/math/cos.cl                |  42 +--
 libclc/generic/lib/math/cos.inc               |  34 ---
 libclc/generic/lib/math/sin.cl                |  41 +--
 libclc/generic/lib/math/sin.inc               |  38 ---
 libclc/generic/lib/math/sincos.cl             |   4 +-
 libclc/generic/lib/math/sincos_helpers.cl     | 285 ------------------
 libclc/generic/lib/math/sincos_helpers.h      |  24 --
 libclc/generic/lib/math/tables.cl             |  30 --
 libclc/spirv/lib/SOURCES                      |   2 -
 30 files changed, 612 insertions(+), 516 deletions(-)
 create mode 100644 libclc/clc/include/clc/math/clc_cos.h
 create mode 100644 libclc/clc/include/clc/math/clc_sin.h
 create mode 100644 libclc/clc/include/clc/math/clc_sincos.h
 create mode 100644 libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc
 create mode 100644 libclc/clc/lib/generic/math/clc_cos.cl
 create mode 100644 libclc/clc/lib/generic/math/clc_cos.inc
 create mode 100644 libclc/clc/lib/generic/math/clc_sin.cl
 create mode 100644 libclc/clc/lib/generic/math/clc_sin.inc
 create mode 100644 libclc/clc/lib/generic/math/clc_sincos.cl
 rename libclc/{generic/lib/math/sincos.inc => 
clc/lib/generic/math/clc_sincos.inc} (62%)
 create mode 100644 libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc
 delete mode 100644 libclc/generic/include/clc/math/sincos.inc
 delete mode 100644 libclc/generic/lib/math/cos.inc
 delete mode 100644 libclc/generic/lib/math/sin.inc
 delete mode 100644 libclc/generic/lib/math/sincos_helpers.cl
 delete mode 100644 libclc/generic/lib/math/sincos_helpers.h
 delete mode 100644 libclc/generic/lib/math/tables.cl

diff --git a/libclc/clc/include/clc/math/clc_cos.h 
b/libclc/clc/include/clc/math/clc_cos.h
new file mode 100644
index 0000000000000..44681608efc37
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_cos.h
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_COS_H__
+#define __CLC_MATH_CLC_COS_H__
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION __clc_cos
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_COS_H__
diff --git a/libclc/clc/include/clc/math/clc_sin.h 
b/libclc/clc/include/clc/math/clc_sin.h
new file mode 100644
index 0000000000000..de4c722ca123f
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_sin.h
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_SIN_H__
+#define __CLC_MATH_CLC_SIN_H__
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION __clc_sin
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_SIN_H__
diff --git a/libclc/clc/include/clc/math/clc_sincos.h 
b/libclc/clc/include/clc/math/clc_sincos.h
new file mode 100644
index 0000000000000..e26dc7c024c9c
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_sincos.h
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_SINCOS_H__
+#define __CLC_MATH_CLC_SINCOS_H__
+
+#define __CLC_BODY <clc/math/unary_decl_with_ptr.inc>
+#define __CLC_FUNCTION __clc_sincos
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_SINCOS_H__
diff --git a/libclc/clc/include/clc/math/clc_sincos_helpers.h 
b/libclc/clc/include/clc/math/clc_sincos_helpers.h
index 8029d439f4c62..e3a2e1c0b605c 100644
--- a/libclc/clc/include/clc/math/clc_sincos_helpers.h
+++ b/libclc/clc/include/clc/math/clc_sincos_helpers.h
@@ -16,4 +16,11 @@
 
 #undef __FLOAT_ONLY
 
+#define __DOUBLE_ONLY
+#define __CLC_BODY <clc/math/clc_sincos_helpers_fp64.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __DOUBLE_ONLY
+
 #endif // __CLC_MATH_CLC_SINCOS_HELPERS_H__
diff --git a/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc 
b/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc
new file mode 100644
index 0000000000000..09c6e1c965f64
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+_CLC_DECL _CLC_OVERLOAD void
+__clc_remainder_piby2_medium(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r,
+                             private __CLC_DOUBLEN *rr,
+                             private __CLC_INTN *regn);
+
+_CLC_DECL _CLC_OVERLOAD void
+__clc_remainder_piby2_large(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r,
+                            private __CLC_DOUBLEN *rr,
+                            private __CLC_INTN *regn);
diff --git a/libclc/clc/include/clc/math/tables.h 
b/libclc/clc/include/clc/math/tables.h
index fb172b0b8f221..0fec778b53679 100644
--- a/libclc/clc/include/clc/math/tables.h
+++ b/libclc/clc/include/clc/math/tables.h
@@ -61,7 +61,6 @@
 
 TABLE_FUNCTION_DECL(float2, log2_tbl);
 TABLE_FUNCTION_DECL(float2, log10_tbl);
-TABLE_FUNCTION_DECL(uint4, pibits_tbl);
 
 CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_head);
 CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_tail);
@@ -75,6 +74,7 @@ CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_head);
 CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_tail);
 CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_head);
 CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_tail);
+CLC_TABLE_FUNCTION_DECL(ulong, pibits_tbl);
 
 #ifdef cl_khr_fp64
 
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index 4240e7b08e7d1..4d66c749fc53e 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -32,6 +32,7 @@ math/clc_atanpi.cl
 math/clc_cbrt.cl
 math/clc_ceil.cl
 math/clc_copysign.cl
+math/clc_cos.cl
 math/clc_cosh.cl
 math/clc_cospi.cl
 math/clc_ep_log.cl
@@ -86,6 +87,8 @@ math/clc_rint.cl
 math/clc_rootn.cl
 math/clc_round.cl
 math/clc_rsqrt.cl
+math/clc_sin.cl
+math/clc_sincos.cl
 math/clc_sincos_helpers.cl
 math/clc_sinh.cl
 math/clc_sinpi.cl
diff --git a/libclc/clc/lib/generic/math/clc_cos.cl 
b/libclc/clc/lib/generic/math/clc_cos.cl
new file mode 100644
index 0000000000000..0c9dc287aa0b4
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_cos.cl
@@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/clcmacro.h>
+#include <clc/float/definitions.h>
+#include <clc/math/clc_fabs.h>
+#include <clc/math/clc_sincos_helpers.h>
+#include <clc/math/clc_sincos_piby4.h>
+#include <clc/math/math.h>
+#include <clc/relational/clc_isinf.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/relational/clc_select.h>
+
+#define __CLC_BODY <clc_cos.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_cos.inc 
b/libclc/clc/lib/generic/math/clc_cos.inc
new file mode 100644
index 0000000000000..4b8108c086090
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_cos.inc
@@ -0,0 +1,63 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN __clc_cos(__CLC_FLOATN x) {
+  __CLC_FLOATN absx = __clc_fabs(x);
+
+  __CLC_FLOATN r0, r1;
+  __CLC_INTN regn = __clc_argReductionS(&r0, &r1, absx);
+
+  __CLC_FLOATN ss = -__clc_sinf_piby4(r0, r1);
+  __CLC_FLOATN cc = __clc_cosf_piby4(r0, r1);
+
+  __CLC_FLOATN c = (regn & 1) != 0 ? ss : cc;
+  c = __CLC_AS_FLOATN(__CLC_AS_INTN(c) ^ ((regn > 1) << 31));
+
+  c = __clc_select(c, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isinf(x));
+
+  return c;
+}
+
+#elif __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cos(__CLC_GENTYPE x) {
+  return __CLC_CONVERT_GENTYPE(__clc_cos(__CLC_CONVERT_FLOATN(x)));
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cos(__CLC_GENTYPE x) {
+  __CLC_GENTYPE absx = __clc_fabs(x);
+
+  __CLC_BIT_INTN is_medium = absx < 0x1.0p+47;
+
+  __CLC_INTN regn_m, regn_l;
+  __CLC_GENTYPE r_m, r_l, rr_m, rr_l;
+
+  __clc_remainder_piby2_medium(absx, &r_m, &rr_m, &regn_m);
+  __clc_remainder_piby2_large(absx, &r_l, &rr_l, &regn_l);
+
+  __CLC_GENTYPE r = is_medium ? r_m : r_l;
+  __CLC_GENTYPE rr = is_medium ? rr_m : rr_l;
+  __CLC_INTN regn = __CLC_CONVERT_INTN(is_medium) ? regn_m : regn_l;
+
+  __CLC_GENTYPE sinval, cosval;
+  __clc_sincos_piby4(r, rr, &sinval, &cosval);
+  sinval = -sinval;
+
+  __CLC_LONGN c =
+      __CLC_AS_LONGN(__CLC_CONVERT_BIT_INTN((regn & 1) != 0) ? sinval : 
cosval);
+  c ^= __CLC_CONVERT_BIT_INTN(regn > 1) << 63;
+
+  return __clc_isnan(absx) | __clc_isinf(absx) ? __CLC_GENTYPE_NAN
+                                               : __CLC_AS_GENTYPE(c);
+}
+
+#endif
diff --git a/libclc/clc/lib/generic/math/clc_sin.cl 
b/libclc/clc/lib/generic/math/clc_sin.cl
new file mode 100644
index 0000000000000..0ff9739c6a846
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_sin.cl
@@ -0,0 +1,25 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/clcmacro.h>
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_fabs.h>
+#include <clc/math/clc_sincos_helpers.h>
+#include <clc/math/clc_sincos_piby4.h>
+#include <clc/math/clc_trunc.h>
+#include <clc/math/math.h>
+#include <clc/math/tables.h>
+#include <clc/relational/clc_isinf.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/relational/clc_select.h>
+#include <clc/shared/clc_max.h>
+
+#define __CLC_BODY <clc_sin.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_sin.inc 
b/libclc/clc/lib/generic/math/clc_sin.inc
new file mode 100644
index 0000000000000..b4f72eb625eb0
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_sin.inc
@@ -0,0 +1,68 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN __clc_sin(__CLC_FLOATN x) {
+  __CLC_FLOATN absx = __clc_fabs(x);
+
+  __CLC_FLOATN r0, r1;
+  __CLC_INTN regn = __clc_argReductionS(&r0, &r1, absx);
+
+  __CLC_FLOATN ss = __clc_sinf_piby4(r0, r1);
+  __CLC_FLOATN cc = __clc_cosf_piby4(r0, r1);
+
+  __CLC_FLOATN s = (regn & 1) != 0 ? cc : ss;
+  s = __CLC_AS_FLOATN(__CLC_AS_INTN(s) ^ ((regn > 1) << 31) ^
+                      (__CLC_AS_INTN(x) ^ __CLC_AS_INTN(absx)));
+
+  s = __clc_select(s, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isinf(x));
+
+  // Subnormals
+  s = x == 0.0f ? x : s;
+
+  return s;
+}
+
+#elif __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sin(__CLC_GENTYPE x) {
+  return __CLC_CONVERT_GENTYPE(__clc_sin(__CLC_CONVERT_FLOATN(x)));
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sin(__CLC_GENTYPE x) {
+  __CLC_GENTYPE absx = __clc_fabs(x);
+
+  __CLC_BIT_INTN is_medium = absx < 0x1.0p+47;
+
+  __CLC_INTN regn_m, regn_l;
+  __CLC_GENTYPE r_m, r_l, rr_m, rr_l;
+
+  __clc_remainder_piby2_medium(absx, &r_m, &rr_m, &regn_m);
+  __clc_remainder_piby2_large(absx, &r_l, &rr_l, &regn_l);
+
+  __CLC_GENTYPE r = is_medium ? r_m : r_l;
+  __CLC_GENTYPE rr = is_medium ? rr_m : rr_l;
+  __CLC_INTN regn = __CLC_CONVERT_INTN(is_medium) ? regn_m : regn_l;
+
+  __CLC_GENTYPE sinval, cosval;
+  __clc_sincos_piby4(r, rr, &sinval, &cosval);
+
+  __CLC_LONGN s =
+      __CLC_AS_LONGN(__CLC_CONVERT_BIT_INTN((regn & 1) != 0) ? cosval : 
sinval);
+
+  s ^= (__CLC_CONVERT_BIT_INTN(regn > 1) << 63) ^
+       (__CLC_CONVERT_BIT_INTN(x < 0.0) << 63);
+
+  return __clc_isinf(x) | __clc_isnan(x) ? __CLC_GENTYPE_NAN
+                                         : __CLC_AS_GENTYPE(s);
+}
+
+#endif
diff --git a/libclc/clc/lib/generic/math/clc_sincos.cl 
b/libclc/clc/lib/generic/math/clc_sincos.cl
new file mode 100644
index 0000000000000..2209a41593a2d
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_sincos.cl
@@ -0,0 +1,14 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/internal/clc.h>
+#include <clc/math/clc_cos.h>
+#include <clc/math/clc_sin.h>
+
+#define __CLC_BODY <clc_sincos.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/sincos.inc 
b/libclc/clc/lib/generic/math/clc_sincos.inc
similarity index 62%
rename from libclc/generic/lib/math/sincos.inc
rename to libclc/clc/lib/generic/math/clc_sincos.inc
index b1d4f88c3d427..1e21b7549e448 100644
--- a/libclc/generic/lib/math/sincos.inc
+++ b/libclc/clc/lib/generic/math/clc_sincos.inc
@@ -6,10 +6,10 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \
-  _CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \
-    *cosval = cos(x); \
-    return sin(x); \
+#define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE)                                  
\
+  _CLC_OVERLOAD _CLC_DEF TYPE __clc_sincos(TYPE x, ADDRSPACE TYPE *cosval) {   
\
+    *cosval = __clc_cos(x);                                                    
\
+    return __clc_sin(x);                                                       
\
   }
 
 __CLC_DECLARE_SINCOS(global, __CLC_GENTYPE)
diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl 
b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
index 24676d3c7711c..c1768e39243fa 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
@@ -31,3 +31,27 @@
 #define __CLC_BODY <clc_sincos_helpers.inc>
 
 #include <clc/math/gentype.inc>
+
+#undef __FLOAT_ONLY
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#include <clc/math/clc_fract.h>
+#include <clc/math/tables.h>
+#include <clc/shared/clc_max.h>
+
+#define bytealign(src0, src1, src2)                                            
\
+  (__CLC_CONVERT_UINTN(                                                        
\
+      ((__CLC_CONVERT_LONGN((src0)) << 32) | __CLC_CONVERT_LONGN((src1))) >>   
\
+      (((src2) & 3) * 8)))
+
+#define __DOUBLE_ONLY
+#define __CLC_BODY <clc_sincos_helpers_fp64.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __DOUBLE_ONLY
+
+#endif
diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc 
b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc
new file mode 100644
index 0000000000000..3a8edb378cae2
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc
@@ -0,0 +1,234 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Reduction for medium sized arguments
+_CLC_DEF _CLC_OVERLOAD void
+__clc_remainder_piby2_medium(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r,
+                             private __CLC_DOUBLEN *rr,
+                             private __CLC_INTN *regn) {
+  // How many pi/2 is x a multiple of?
+  const __CLC_DOUBLEN two_by_pi = 0x1.45f306dc9c883p-1;
+  __CLC_DOUBLEN dnpi2 = __clc_trunc(__clc_fma(x, two_by_pi, 0.5));
+
+  const __CLC_DOUBLEN piby2_h = -7074237752028440.0 / 0x1.0p+52;
+  const __CLC_DOUBLEN piby2_m = -2483878800010755.0 / 0x1.0p+105;
+  const __CLC_DOUBLEN piby2_t = -3956492004828932.0 / 0x1.0p+158;
+
+  // Compute product of npi2 with 159 bits of 2/pi
+  __CLC_DOUBLEN p_hh = piby2_h * dnpi2;
+  __CLC_DOUBLEN p_ht = __clc_fma(piby2_h, dnpi2, -p_hh);
+  __CLC_DOUBLEN p_mh = piby2_m * dnpi2;
+  __CLC_DOUBLEN p_mt = __clc_fma(piby2_m, dnpi2, -p_mh);
+  __CLC_DOUBLEN p_th = piby2_t * dnpi2;
+  __CLC_DOUBLEN p_tt = __clc_fma(piby2_t, dnpi2, -p_th);
+
+  // Reduce to 159 bits
+  __CLC_DOUBLEN ph = p_hh;
+  __CLC_DOUBLEN pm = p_ht + p_mh;
+  __CLC_DOUBLEN t = p_mh - (pm - p_ht);
+  __CLC_DOUBLEN pt = p_th + t + p_mt + p_tt;
+  t = ph + pm;
+  pm = pm - (t - ph);
+  ph = t;
+  t = pm + pt;
+  pt = pt - (t - pm);
+  pm = t;
+
+  // Subtract from x
+  t = x + ph;
+  __CLC_DOUBLEN qh = t + pm;
+  __CLC_DOUBLEN qt = pm - (qh - t) + pt;
+
+  *r = qh;
+  *rr = qt;
+  *regn = __CLC_CONVERT_INTN(__CLC_CONVERT_LONGN(dnpi2)) & 0x3;
+}
+
+// Given positive argument x, reduce it to the range [-pi/4,pi/4] using
+// extra precision, and return the result in r, rr.
+// Return value "regn" tells how many lots of pi/2 were subtracted
+// from x to put it in the range [-pi/4,pi/4], mod 4.
+_CLC_DEF _CLC_OVERLOAD void
+__clc_remainder_piby2_large(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r,
+                            private __CLC_DOUBLEN *rr,
+                            private __CLC_INTN *regn) {
+
+  __CLC_LONGN ux = __CLC_AS_LONGN(x);
+  __CLC_INTN e = __CLC_CONVERT_INTN(ux >> 52) - 1023;
+  __CLC_INTN i = __clc_max(23, (e >> 3) + 17);
+  __CLC_INTN j = 150 - i;
+  __CLC_INTN j16 = j & ~0xf;
+  __CLC_DOUBLEN fract_temp;
+
+  // The following extracts 192 consecutive bits of 2/pi aligned on an 
arbitrary
+  // byte boundary
+  __CLC_ULONGN q0 = USE_TABLE(pibits_tbl, j16);
+  __CLC_ULONGN q3 = USE_TABLE(pibits_tbl, (j16 + 8));
+  __CLC_ULONGN q1 = USE_TABLE(pibits_tbl, (j16 + 16));
+  __CLC_ULONGN q4 = USE_TABLE(pibits_tbl, (j16 + 24));
+  __CLC_ULONGN q2 = USE_TABLE(pibits_tbl, (j16 + 32));
+
+  __CLC_INTN k = (j >> 2) & 0x3;
+  __CLC_INTN c0 = k == 0;
+  __CLC_INTN c1 = k == 1;
+  __CLC_INTN c2 = k == 2;
+  __CLC_INTN c3 = k == 3;
+
+  __CLC_UINTN u0, u1, u2, u3, u4, u5, u6;
+
+  __CLC_UINTN q0s0 = __CLC_CONVERT_UINTN(q0);
+  __CLC_UINTN q0s1 = __CLC_CONVERT_UINTN(q0 >> 32);
+
+  __CLC_UINTN q1s0 = __CLC_CONVERT_UINTN(q1);
+  __CLC_UINTN q1s1 = __CLC_CONVERT_UINTN(q1 >> 32);
+
+  __CLC_UINTN q2s0 = __CLC_CONVERT_UINTN(q2);
+  __CLC_UINTN q2s1 = __CLC_CONVERT_UINTN(q2 >> 32);
+
+  __CLC_UINTN q3s0 = __CLC_CONVERT_UINTN(q3);
+  __CLC_UINTN q3s1 = __CLC_CONVERT_UINTN(q3 >> 32);
+
+  __CLC_UINTN q4s0 = __CLC_CONVERT_UINTN(q4);
+  __CLC_UINTN q4s1 = __CLC_CONVERT_UINTN(q4 >> 32);
+
+  u0 = c1 ? q0s1 : q0s0;
+  u0 = c2 ? q3s0 : u0;
+  u0 = c3 ? q3s1 : u0;
+
+  u1 = c1 ? q3s0 : q0s1;
+  u1 = c2 ? q3s1 : u1;
+  u1 = c3 ? q1s0 : u1;
+
+  u2 = c1 ? q3s1 : q3s0;
+  u2 = c2 ? q1s0 : u2;
+  u2 = c3 ? q1s1 : u2;
+
+  u3 = c1 ? q1s0 : q3s1;
+  u3 = c2 ? q1s1 : u3;
+  u3 = c3 ? q4s0 : u3;
+
+  u4 = c1 ? q1s1 : q1s0;
+  u4 = c2 ? q4s0 : u4;
+  u4 = c3 ? q4s1 : u4;
+
+  u5 = c1 ? q4s0 : q1s1;
+  u5 = c2 ? q4s1 : u5;
+  u5 = c3 ? q2s0 : u5;
+
+  u6 = c1 ? q4s1 : q4s0;
+  u6 = c2 ? q2s0 : u6;
+  u6 = c3 ? q2s1 : u6;
+
+  __CLC_UINTN v0 = bytealign(u1, u0, j);
+  __CLC_UINTN v1 = bytealign(u2, u1, j);
+  __CLC_UINTN v2 = bytealign(u3, u2, j);
+  __CLC_UINTN v3 = bytealign(u4, u3, j);
+  __CLC_UINTN v4 = bytealign(u5, u4, j);
+  __CLC_UINTN v5 = bytealign(u6, u5, j);
+
+  // Place those 192 bits in 4 48-bit doubles along with correct exponent
+  // If i > 1018 we would get subnormals so we scale p up and x down to get the
+  // same product
+  i = 2 + 8 * i;
+  x *= __CLC_CONVERT_BIT_INTN(i > 1018) ? 0x1.0p-136 : 1.0;
+  i -= i > 1018 ? 136 : 0;
+
+#define doublen_lohi(x, y)                                                     
\
+  __CLC_AS_DOUBLEN(__CLC_CONVERT_ULONGN((x)) & 0xFFFFFFFF |                    
\
+                   __CLC_CONVERT_ULONGN((y)) << 32)
+
+  __CLC_UINTN ua = __CLC_CONVERT_UINTN(1023 + 52 - i) << 20;
+  __CLC_DOUBLEN a = doublen_lohi((__CLC_ULONGN)0, ua);
+  __CLC_DOUBLEN p0 = doublen_lohi(v0, ua | (v1 & 0xffffU)) - a;
+  ua += 0x03000000U;
+  a = doublen_lohi((__CLC_ULONGN)0, ua);
+  __CLC_DOUBLEN p1 =
+      doublen_lohi(((v2 << 16) | (v1 >> 16)), (ua | (v2 >> 16))) - a;
+  ua += 0x03000000U;
+  a = doublen_lohi((__CLC_ULONGN)0, ua);
+  __CLC_DOUBLEN p2 = doublen_lohi(v3, (ua | (v4 & 0xffffU))) - a;
+  ua += 0x03000000U;
+  a = doublen_lohi((__CLC_ULONGN)0, ua);
+  __CLC_DOUBLEN p3 =
+      doublen_lohi(((v5 << 16) | (v4 >> 16)), (ua | (v5 >> 16))) - a;
+
+#undef doublen_lohi
+
+  // Exact multiply
+  __CLC_DOUBLEN f0h = p0 * x;
+  __CLC_DOUBLEN f0l = __clc_fma(p0, x, -f0h);
+  __CLC_DOUBLEN f1h = p1 * x;
+  __CLC_DOUBLEN f1l = __clc_fma(p1, x, -f1h);
+  __CLC_DOUBLEN f2h = p2 * x;
+  __CLC_DOUBLEN f2l = __clc_fma(p2, x, -f2h);
+  __CLC_DOUBLEN f3h = p3 * x;
+  __CLC_DOUBLEN f3l = __clc_fma(p3, x, -f3h);
+
+  // Accumulate product into 4 doubles
+  __CLC_DOUBLEN s, t;
+
+  __CLC_DOUBLEN f3 = f3h + f2h;
+  t = f2h - (f3 - f3h);
+  s = f3l + t;
+  t = t - (s - f3l);
+
+  __CLC_DOUBLEN f2 = s + f1h;
+  t = f1h - (f2 - s) + t;
+  s = f2l + t;
+  t = t - (s - f2l);
+
+  __CLC_DOUBLEN f1 = s + f0h;
+  t = f0h - (f1 - s) + t;
+  s = f1l + t;
+
+  __CLC_DOUBLEN f0 = s + f0l;
+
+  // Strip off unwanted large integer bits
+  f3 = 0x1.0p+10 * __clc_fract(f3 * 0x1.0p-10, &fract_temp);
+  f3 += f3 + f2 < 0.0 ? 0x1.0p+10 : 0.0;
+
+  // Compute least significant integer bits
+  t = f3 + f2;
+  __CLC_DOUBLEN di = t - __clc_fract(t, &fract_temp);
+  i = __CLC_CONVERT_INTN(__CLC_CONVERT_FLOATN(di));
+
+  // Shift out remaining integer part
+  f3 -= di;
+  s = f3 + f2;
+  t = f2 - (s - f3);
+  f3 = s;
+  f2 = t;
+  s = f2 + f1;
+  t = f1 - (s - f2);
+  f2 = s;
+  f1 = t;
+  f1 += f0;
+
+  // Subtract 1 if fraction is >= 0.5, and update regn
+  __CLC_INTN g = __CLC_CONVERT_INTN(f3 >= 0.5 ? 1L : 0L);
+  i += g;
+  f3 -= __CLC_CONVERT_DOUBLEN(__CLC_CONVERT_FLOATN(g));
+
+  // Shift up bits
+  s = f3 + f2;
+  t = f2 - (s - f3);
+  f3 = s;
+  f2 = t + f1;
+
+  // Multiply precise fraction by pi/2 to get radians
+  const __CLC_DOUBLEN p2h = 7074237752028440.0 / 0x1.0p+52;
+  const __CLC_DOUBLEN p2t = 4967757600021510.0 / 0x1.0p+106;
+
+  __CLC_DOUBLEN rhi = f3 * p2h;
+  __CLC_DOUBLEN rlo =
+      __clc_fma(f2, p2h, __clc_fma(f3, p2t, __clc_fma(f3, p2h, -rhi)));
+
+  *r = rhi + rlo;
+  *rr = rlo - (*r - rhi);
+  *regn = i & 0x3;
+}
diff --git a/libclc/clc/lib/generic/math/clc_tables.cl 
b/libclc/clc/lib/generic/math/clc_tables.cl
index 7ee744589ae1d..05053aea7bc12 100644
--- a/libclc/clc/lib/generic/math/clc_tables.cl
+++ b/libclc/clc/lib/generic/math/clc_tables.cl
@@ -1648,4 +1648,66 @@ CLC_TABLE_FUNCTION(double, SINH_TBL_TAIL, sinh_tbl_tail);
 CLC_TABLE_FUNCTION(double, COSH_TBL_HEAD, cosh_tbl_head);
 CLC_TABLE_FUNCTION(double, COSH_TBL_TAIL, cosh_tbl_tail);
 
+DECLARE_TABLE(uchar, PIBITS_TBL, ) = {
+    224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175,
+    169, 75, 74, 41, 222, 231, 28, 244, 236, 197, 151, 175, 31,
+    235, 158, 212, 181, 168, 127, 121, 154, 253, 24, 61, 221, 38,
+    44, 159, 60, 251, 217, 180, 125, 180, 41, 104, 45, 70, 188,
+    188, 63, 96, 22, 120, 255, 95, 226, 127, 236, 160, 228, 247,
+    46, 126, 17, 114, 210, 231, 76, 13, 230, 88, 71, 230, 4, 249,
+    125, 209, 154, 192, 113, 166, 19, 18, 237, 186, 212, 215, 8,
+    162, 251, 156, 166, 196, 114, 172, 119, 248, 115, 72, 70, 39,
+    168, 187, 36, 25, 128, 75, 55, 9, 233, 184, 145, 220, 134, 21,
+    239, 122, 175, 142, 69, 249, 7, 65, 14, 241, 100, 86, 138, 109,
+    3, 119, 211, 212, 71, 95, 157, 240, 167, 84, 16, 57, 185, 13,
+    230, 139, 2, 0, 0, 0, 0, 0, 0, 0
+};
+
+_CLC_DEF _CLC_OVERLOAD ulong TABLE_MANGLE(pibits_tbl)(int idx) {
+  return *(__constant ulong *)(PIBITS_TBL + idx);
+}
+_CLC_DEF _CLC_OVERLOAD ulong2 TABLE_MANGLE(pibits_tbl)(int2 idx) {
+  return (ulong2){*(__constant ulong *)(PIBITS_TBL + idx.s0),
+                  *(__constant ulong *)(PIBITS_TBL + idx.s1)};
+}
+_CLC_DEF _CLC_OVERLOAD ulong3 TABLE_MANGLE(pibits_tbl)(int3 idx) {
+  return (ulong3){*(__constant ulong *)(PIBITS_TBL + idx.s0),
+                  *(__constant ulong *)(PIBITS_TBL + idx.s1),
+                  *(__constant ulong *)(PIBITS_TBL + idx.s2)};
+}
+_CLC_DEF _CLC_OVERLOAD ulong4 TABLE_MANGLE(pibits_tbl)(int4 idx) {
+  return (ulong4){*(__constant ulong *)(PIBITS_TBL + idx.s0),
+                  *(__constant ulong *)(PIBITS_TBL + idx.s1),
+                  *(__constant ulong *)(PIBITS_TBL + idx.s2),
+                  *(__constant ulong *)(PIBITS_TBL + idx.s3)};
+}
+_CLC_DEF _CLC_OVERLOAD ulong8 TABLE_MANGLE(pibits_tbl)(int8 idx) {
+  return (ulong8){*(__constant ulong *)(PIBITS_TBL + idx.s0),
+                  *(__constant ulong *)(PIBITS_TBL + idx.s1),
+                  *(__constant ulong *)(PIBITS_TBL + idx.s2),
+                  *(__constant ulong *)(PIBITS_TBL + idx.s3),
+                  *(__constant ulong *)(PIBITS_TBL + idx.s4),
+                  *(__constant ulong *)(PIBITS_TBL + idx.s5),
+                  *(__constant ulong *)(PIBITS_TBL + idx.s6),
+                  *(__constant ulong *)(PIBITS_TBL + idx.s7)};
+}
+_CLC_DEF _CLC_OVERLOAD ulong16 TABLE_MANGLE(pibits_tbl)(int16 idx) {
+  return (ulong16){*(__constant ulong *)(PIBITS_TBL + idx.s0),
+                   *(__constant ulong *)(PIBITS_TBL + idx.s1),
+                   *(__constant ulong *)(PIBITS_TBL + idx.s2),
+                   *(__constant ulong *)(PIBITS_TBL + idx.s3),
+                   *(__constant ulong *)(PIBITS_TBL + idx.s4),
+                   *(__constant ulong *)(PIBITS_TBL + idx.s5),
+                   *(__constant ulong *)(PIBITS_TBL + idx.s6),
+                   *(__constant ulong *)(PIBITS_TBL + idx.s7),
+                   *(__constant ulong *)(PIBITS_TBL + idx.s8),
+                   *(__constant ulong *)(PIBITS_TBL + idx.s9),
+                   *(__constant ulong *)(PIBITS_TBL + idx.sA),
+                   *(__constant ulong *)(PIBITS_TBL + idx.sB),
+                   *(__constant ulong *)(PIBITS_TBL + idx.sC),
+                   *(__constant ulong *)(PIBITS_TBL + idx.sD),
+                   *(__constant ulong *)(PIBITS_TBL + idx.sE),
+                   *(__constant ulong *)(PIBITS_TBL + idx.sF)};
+}
+
 #endif // cl_khr_fp64
diff --git a/libclc/clspv/lib/SOURCES b/libclc/clspv/lib/SOURCES
index d2fea9d586287..437210d1922da 100644
--- a/libclc/clspv/lib/SOURCES
+++ b/libclc/clspv/lib/SOURCES
@@ -66,10 +66,8 @@ subnormal_config.cl
 ../../generic/lib/math/rootn.cl
 ../../generic/lib/math/sin.cl
 ../../generic/lib/math/sincos.cl
-../../generic/lib/math/sincos_helpers.cl
 ../../generic/lib/math/sinh.cl
 ../../generic/lib/math/sinpi.cl
-../../generic/lib/math/tables.cl
 ../../generic/lib/math/tan.cl
 ../../generic/lib/math/tanh.cl
 ../../generic/lib/math/tanpi.cl
diff --git a/libclc/generic/include/clc/math/sincos.h 
b/libclc/generic/include/clc/math/sincos.h
index fbd29c441e319..7426c23e88af9 100644
--- a/libclc/generic/include/clc/math/sincos.h
+++ b/libclc/generic/include/clc/math/sincos.h
@@ -6,5 +6,7 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#define __CLC_BODY <clc/math/sincos.inc>
+#define __CLC_BODY <clc/math/unary_decl_with_ptr.inc>
+#define __CLC_FUNCTION __clc_sincos
 #include <clc/math/gentype.inc>
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/math/sincos.inc 
b/libclc/generic/include/clc/math/sincos.inc
deleted file mode 100644
index d6ec2fe94704e..0000000000000
--- a/libclc/generic/include/clc/math/sincos.inc
+++ /dev/null
@@ -1,14 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos(__CLC_GENTYPE x,
-                                             global __CLC_GENTYPE *cosval);
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos(__CLC_GENTYPE x,
-                                             local __CLC_GENTYPE *cosval);
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos(__CLC_GENTYPE x,
-                                             private __CLC_GENTYPE *cosval);
diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES
index 781aaf16a5244..8c7565e3dd231 100644
--- a/libclc/generic/lib/SOURCES
+++ b/libclc/generic/lib/SOURCES
@@ -131,7 +131,6 @@ math/native_rsqrt.cl
 math/native_sin.cl
 math/native_sqrt.cl
 math/native_tan.cl
-math/tables.cl
 math/nextafter.cl
 math/pow.cl
 math/pown.cl
@@ -144,7 +143,6 @@ math/round.cl
 math/rsqrt.cl
 math/sin.cl
 math/sincos.cl
-math/sincos_helpers.cl
 math/sinh.cl
 math/sinpi.cl
 math/sqrt.cl
diff --git a/libclc/generic/lib/math/clc_tan.cl 
b/libclc/generic/lib/math/clc_tan.cl
index 7e28e9ffed3b6..ce51c1031fa45 100644
--- a/libclc/generic/lib/math/clc_tan.cl
+++ b/libclc/generic/lib/math/clc_tan.cl
@@ -6,7 +6,6 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include "sincos_helpers.h"
 #include <clc/clc.h>
 #include <clc/clcmacro.h>
 #include <clc/math/clc_fabs.h>
diff --git a/libclc/generic/lib/math/cos.cl b/libclc/generic/lib/math/cos.cl
index 00ffa371ad8fc..5b97c6a6f379a 100644
--- a/libclc/generic/lib/math/cos.cl
+++ b/libclc/generic/lib/math/cos.cl
@@ -6,45 +6,9 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include "sincos_helpers.h"
 #include <clc/clc.h>
-#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
-#include <clc/math/clc_fabs.h>
-#include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/math.h>
-#include <clc/relational/clc_isinf.h>
-#include <clc/relational/clc_isnan.h>
-#include <clc/relational/clc_select.h>
+#include <clc/math/clc_cos.h>
 
-// FP32 and FP16 versions.
-#define __CLC_BODY <cos.inc>
+#define FUNCTION cos
+#define __CLC_BODY <clc/shared/unary_def.inc>
 #include <clc/math/gentype.inc>
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF double cos(double x) {
-    x = fabs(x);
-
-    double r, rr;
-    int regn;
-
-    if (x < 0x1.0p+47)
-        __clc_remainder_piby2_medium(x, &r, &rr, &regn);
-    else
-        __clc_remainder_piby2_large(x, &r, &rr, &regn);
-
-    double2 sc = __clc_sincos_piby4(r, rr);
-    sc.lo = -sc.lo;
-
-    int2 c = as_int2(regn & 1 ? sc.lo : sc.hi);
-    c.hi ^= (regn > 1) << 31;
-
-    return isnan(x) | isinf(x) ? as_double(QNANBITPATT_DP64) : as_double(c);
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cos, double);
-
-#endif
diff --git a/libclc/generic/lib/math/cos.inc b/libclc/generic/lib/math/cos.inc
deleted file mode 100644
index 1db96710457dd..0000000000000
--- a/libclc/generic/lib/math/cos.inc
+++ /dev/null
@@ -1,34 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#if __CLC_FPSIZE == 32
-
-_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN cos(__CLC_FLOATN x) {
-  __CLC_FLOATN absx = __clc_fabs(x);
-
-  __CLC_FLOATN r0, r1;
-  __CLC_INTN regn = __clc_argReductionS(&r0, &r1, absx);
-
-  __CLC_FLOATN ss = -__clc_sinf_piby4(r0, r1);
-  __CLC_FLOATN cc = __clc_cosf_piby4(r0, r1);
-
-  __CLC_FLOATN c = (regn & 1) != 0 ? ss : cc;
-  c = __CLC_AS_FLOATN(__CLC_AS_INTN(c) ^ ((regn > 1) << 31));
-
-  c = __clc_select(c, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isinf(x));
-
-  return c;
-}
-
-#elif __CLC_FPSIZE == 16
-
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE cos(__CLC_GENTYPE x) {
-  return __CLC_CONVERT_GENTYPE(cos(__CLC_CONVERT_FLOATN(x)));
-}
-
-#endif
diff --git a/libclc/generic/lib/math/sin.cl b/libclc/generic/lib/math/sin.cl
index f776805ad1cdb..76728cfb1c5e5 100644
--- a/libclc/generic/lib/math/sin.cl
+++ b/libclc/generic/lib/math/sin.cl
@@ -6,44 +6,9 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include "sincos_helpers.h"
 #include <clc/clc.h>
-#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
-#include <clc/math/clc_fabs.h>
-#include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/math.h>
-#include <clc/relational/clc_isinf.h>
-#include <clc/relational/clc_isnan.h>
-#include <clc/relational/clc_select.h>
+#include <clc/math/clc_sin.h>
 
-// FP32 and FP16 versions.
-#define __CLC_BODY <sin.inc>
+#define FUNCTION sin
+#define __CLC_BODY <clc/shared/unary_def.inc>
 #include <clc/math/gentype.inc>
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF double sin(double x) {
-    double y = fabs(x);
-
-    double r, rr;
-    int regn;
-
-    if (y < 0x1.0p+47)
-        __clc_remainder_piby2_medium(y, &r, &rr, &regn);
-    else
-        __clc_remainder_piby2_large(y, &r, &rr, &regn);
-
-    double2 sc = __clc_sincos_piby4(r, rr);
-
-    int2 s = as_int2(regn & 1 ? sc.hi : sc.lo);
-    s.hi ^= ((regn > 1) << 31) ^ ((x < 0.0) << 31);
-
-    return  isinf(x) | isnan(x) ? as_double(QNANBITPATT_DP64) : as_double(s);
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sin, double);
-
-#endif
diff --git a/libclc/generic/lib/math/sin.inc b/libclc/generic/lib/math/sin.inc
deleted file mode 100644
index dbc99116dfa7d..0000000000000
--- a/libclc/generic/lib/math/sin.inc
+++ /dev/null
@@ -1,38 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#if __CLC_FPSIZE == 32
-
-_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN sin(__CLC_FLOATN x) {
-  __CLC_FLOATN absx = __clc_fabs(x);
-
-  __CLC_FLOATN r0, r1;
-  __CLC_INTN regn = __clc_argReductionS(&r0, &r1, absx);
-
-  __CLC_FLOATN ss = __clc_sinf_piby4(r0, r1);
-  __CLC_FLOATN cc = __clc_cosf_piby4(r0, r1);
-
-  __CLC_FLOATN s = (regn & 1) != 0 ? cc : ss;
-  s = __CLC_AS_FLOATN(__CLC_AS_INTN(s) ^ ((regn > 1) << 31) ^
-                      (__CLC_AS_INTN(x) ^ __CLC_AS_INTN(absx)));
-
-  s = __clc_select(s, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isinf(x));
-
-  // Subnormals
-  s = x == 0.0f ? x : s;
-
-  return s;
-}
-
-#elif __CLC_FPSIZE == 16
-
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE sin(__CLC_GENTYPE x) {
-  return __CLC_CONVERT_GENTYPE(sin(__CLC_CONVERT_FLOATN(x)));
-}
-
-#endif
diff --git a/libclc/generic/lib/math/sincos.cl 
b/libclc/generic/lib/math/sincos.cl
index fc0e4d350c122..25e620cf2a38e 100644
--- a/libclc/generic/lib/math/sincos.cl
+++ b/libclc/generic/lib/math/sincos.cl
@@ -7,6 +7,8 @@
 
//===----------------------------------------------------------------------===//
 
 #include <clc/clc.h>
+#include <clc/math/clc_sincos.h>
 
-#define __CLC_BODY <sincos.inc>
+#define FUNCTION sincos
+#define __CLC_BODY <clc/math/unary_def_with_ptr.inc>
 #include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/sincos_helpers.cl 
b/libclc/generic/lib/math/sincos_helpers.cl
deleted file mode 100644
index 651cd11ccf016..0000000000000
--- a/libclc/generic/lib/math/sincos_helpers.cl
+++ /dev/null
@@ -1,285 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "sincos_helpers.h"
-#include <clc/clc.h>
-#include <clc/integer/clc_clz.h>
-#include <clc/integer/clc_mul_hi.h>
-#include <clc/math/clc_fma.h>
-#include <clc/math/clc_mad.h>
-#include <clc/math/clc_trunc.h>
-#include <clc/math/math.h>
-#include <clc/math/tables.h>
-#include <clc/shared/clc_max.h>
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-#define bytealign(src0, src1, src2)                                            
\
-  ((uint)(((((long)(src0)) << 32) | (long)(src1)) >> (((src2) & 3) * 8)))
-
-// Reduction for medium sized arguments
-_CLC_DEF void __clc_remainder_piby2_medium(double x, private double *r,
-                                           private double *rr,
-                                           private int *regn) {
-  // How many pi/2 is x a multiple of?
-  const double two_by_pi = 0x1.45f306dc9c883p-1;
-  double dnpi2 = __clc_trunc(__clc_fma(x, two_by_pi, 0.5));
-
-  const double piby2_h = -7074237752028440.0 / 0x1.0p+52;
-  const double piby2_m = -2483878800010755.0 / 0x1.0p+105;
-  const double piby2_t = -3956492004828932.0 / 0x1.0p+158;
-
-  // Compute product of npi2 with 159 bits of 2/pi
-  double p_hh = piby2_h * dnpi2;
-  double p_ht = __clc_fma(piby2_h, dnpi2, -p_hh);
-  double p_mh = piby2_m * dnpi2;
-  double p_mt = __clc_fma(piby2_m, dnpi2, -p_mh);
-  double p_th = piby2_t * dnpi2;
-  double p_tt = __clc_fma(piby2_t, dnpi2, -p_th);
-
-  // Reduce to 159 bits
-  double ph = p_hh;
-  double pm = p_ht + p_mh;
-  double t = p_mh - (pm - p_ht);
-  double pt = p_th + t + p_mt + p_tt;
-  t = ph + pm;
-  pm = pm - (t - ph);
-  ph = t;
-  t = pm + pt;
-  pt = pt - (t - pm);
-  pm = t;
-
-  // Subtract from x
-  t = x + ph;
-  double qh = t + pm;
-  double qt = pm - (qh - t) + pt;
-
-  *r = qh;
-  *rr = qt;
-  *regn = (int)(long)dnpi2 & 0x3;
-}
-
-// Given positive argument x, reduce it to the range [-pi/4,pi/4] using
-// extra precision, and return the result in r, rr.
-// Return value "regn" tells how many lots of pi/2 were subtracted
-// from x to put it in the range [-pi/4,pi/4], mod 4.
-
-_CLC_DEF void __clc_remainder_piby2_large(double x, private double *r,
-                                          private double *rr,
-                                          private int *regn) {
-
-  long ux = as_long(x);
-  int e = (int)(ux >> 52) - 1023;
-  int i = __clc_max(23, (e >> 3) + 17);
-  int j = 150 - i;
-  int j16 = j & ~0xf;
-  double fract_temp;
-
-  // The following extracts 192 consecutive bits of 2/pi aligned on an 
arbitrary
-  // byte boundary
-  uint4 q0 = USE_TABLE(pibits_tbl, j16);
-  uint4 q1 = USE_TABLE(pibits_tbl, (j16 + 16));
-  uint4 q2 = USE_TABLE(pibits_tbl, (j16 + 32));
-
-  int k = (j >> 2) & 0x3;
-  int4 c = (int4)k == (int4)(0, 1, 2, 3);
-
-  uint u0, u1, u2, u3, u4, u5, u6;
-
-  u0 = c.s1 ? q0.s1 : q0.s0;
-  u0 = c.s2 ? q0.s2 : u0;
-  u0 = c.s3 ? q0.s3 : u0;
-
-  u1 = c.s1 ? q0.s2 : q0.s1;
-  u1 = c.s2 ? q0.s3 : u1;
-  u1 = c.s3 ? q1.s0 : u1;
-
-  u2 = c.s1 ? q0.s3 : q0.s2;
-  u2 = c.s2 ? q1.s0 : u2;
-  u2 = c.s3 ? q1.s1 : u2;
-
-  u3 = c.s1 ? q1.s0 : q0.s3;
-  u3 = c.s2 ? q1.s1 : u3;
-  u3 = c.s3 ? q1.s2 : u3;
-
-  u4 = c.s1 ? q1.s1 : q1.s0;
-  u4 = c.s2 ? q1.s2 : u4;
-  u4 = c.s3 ? q1.s3 : u4;
-
-  u5 = c.s1 ? q1.s2 : q1.s1;
-  u5 = c.s2 ? q1.s3 : u5;
-  u5 = c.s3 ? q2.s0 : u5;
-
-  u6 = c.s1 ? q1.s3 : q1.s2;
-  u6 = c.s2 ? q2.s0 : u6;
-  u6 = c.s3 ? q2.s1 : u6;
-
-  uint v0 = bytealign(u1, u0, j);
-  uint v1 = bytealign(u2, u1, j);
-  uint v2 = bytealign(u3, u2, j);
-  uint v3 = bytealign(u4, u3, j);
-  uint v4 = bytealign(u5, u4, j);
-  uint v5 = bytealign(u6, u5, j);
-
-  // Place those 192 bits in 4 48-bit doubles along with correct exponent
-  // If i > 1018 we would get subnormals so we scale p up and x down to get the
-  // same product
-  i = 2 + 8 * i;
-  x *= i > 1018 ? 0x1.0p-136 : 1.0;
-  i -= i > 1018 ? 136 : 0;
-
-  uint ua = (uint)(1023 + 52 - i) << 20;
-  double a = as_double((uint2)(0, ua));
-  double p0 = as_double((uint2)(v0, ua | (v1 & 0xffffU))) - a;
-  ua += 0x03000000U;
-  a = as_double((uint2)(0, ua));
-  double p1 = as_double((uint2)((v2 << 16) | (v1 >> 16), ua | (v2 >> 16))) - a;
-  ua += 0x03000000U;
-  a = as_double((uint2)(0, ua));
-  double p2 = as_double((uint2)(v3, ua | (v4 & 0xffffU))) - a;
-  ua += 0x03000000U;
-  a = as_double((uint2)(0, ua));
-  double p3 = as_double((uint2)((v5 << 16) | (v4 >> 16), ua | (v5 >> 16))) - a;
-
-  // Exact multiply
-  double f0h = p0 * x;
-  double f0l = __clc_fma(p0, x, -f0h);
-  double f1h = p1 * x;
-  double f1l = __clc_fma(p1, x, -f1h);
-  double f2h = p2 * x;
-  double f2l = __clc_fma(p2, x, -f2h);
-  double f3h = p3 * x;
-  double f3l = __clc_fma(p3, x, -f3h);
-
-  // Accumulate product into 4 doubles
-  double s, t;
-
-  double f3 = f3h + f2h;
-  t = f2h - (f3 - f3h);
-  s = f3l + t;
-  t = t - (s - f3l);
-
-  double f2 = s + f1h;
-  t = f1h - (f2 - s) + t;
-  s = f2l + t;
-  t = t - (s - f2l);
-
-  double f1 = s + f0h;
-  t = f0h - (f1 - s) + t;
-  s = f1l + t;
-
-  double f0 = s + f0l;
-
-  // Strip off unwanted large integer bits
-  f3 = 0x1.0p+10 * fract(f3 * 0x1.0p-10, &fract_temp);
-  f3 += f3 + f2 < 0.0 ? 0x1.0p+10 : 0.0;
-
-  // Compute least significant integer bits
-  t = f3 + f2;
-  double di = t - fract(t, &fract_temp);
-  i = (float)di;
-
-  // Shift out remaining integer part
-  f3 -= di;
-  s = f3 + f2;
-  t = f2 - (s - f3);
-  f3 = s;
-  f2 = t;
-  s = f2 + f1;
-  t = f1 - (s - f2);
-  f2 = s;
-  f1 = t;
-  f1 += f0;
-
-  // Subtract 1 if fraction is >= 0.5, and update regn
-  int g = f3 >= 0.5;
-  i += g;
-  f3 -= (float)g;
-
-  // Shift up bits
-  s = f3 + f2;
-  t = f2 - (s - f3);
-  f3 = s;
-  f2 = t + f1;
-
-  // Multiply precise fraction by pi/2 to get radians
-  const double p2h = 7074237752028440.0 / 0x1.0p+52;
-  const double p2t = 4967757600021510.0 / 0x1.0p+106;
-
-  double rhi = f3 * p2h;
-  double rlo = __clc_fma(f2, p2h, __clc_fma(f3, p2t, __clc_fma(f3, p2h, 
-rhi)));
-
-  *r = rhi + rlo;
-  *rr = rlo - (*r - rhi);
-  *regn = i & 0x3;
-}
-
-_CLC_DEF double2 __clc_sincos_piby4(double x, double xx) {
-  // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
-  //                      = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
-  //                      = x * f(w)
-  // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
-  // We use a minimax approximation of (f(w) - 1) / w
-  // because this produces an expansion in even powers of x.
-  // If xx (the tail of x) is non-zero, we add a correction
-  // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
-  // is an approximation to cos(x)*sin(xx) valid because
-  // xx is tiny relative to x.
-
-  // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
-  //                      = f(w)
-  // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
-  // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
-  // because this produces an expansion in even powers of x.
-  // If xx (the tail of x) is non-zero, we subtract a correction
-  // term g(x,xx) = x*xx to the result, where g(x,xx)
-  // is an approximation to sin(x)*sin(xx) valid because
-  // xx is tiny relative to x.
-
-  const double sc1 = -0.166666666666666646259241729;
-  const double sc2 = 0.833333333333095043065222816e-2;
-  const double sc3 = -0.19841269836761125688538679e-3;
-  const double sc4 = 0.275573161037288022676895908448e-5;
-  const double sc5 = -0.25051132068021699772257377197e-7;
-  const double sc6 = 0.159181443044859136852668200e-9;
-
-  const double cc1 = 0.41666666666666665390037e-1;
-  const double cc2 = -0.13888888888887398280412e-2;
-  const double cc3 = 0.248015872987670414957399e-4;
-  const double cc4 = -0.275573172723441909470836e-6;
-  const double cc5 = 0.208761463822329611076335e-8;
-  const double cc6 = -0.113826398067944859590880e-10;
-
-  double x2 = x * x;
-  double x3 = x2 * x;
-  double r = 0.5 * x2;
-  double t = 1.0 - r;
-
-  double sp = __clc_fma(
-      __clc_fma(__clc_fma(__clc_fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, 
sc2);
-
-  double cp =
-      t +
-      __clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(cc6, x2, 
cc5),
-                                                        x2, cc4),
-                                              x2, cc3),
-                                    x2, cc2),
-                          x2, cc1),
-                x2 * x2, __clc_fma(x, xx, (1.0 - t) - r));
-
-  double2 ret;
-  ret.lo =
-      x - __clc_fma(-x3, sc1, __clc_fma(__clc_fma(-x3, sp, 0.5 * xx), x2, 
-xx));
-  ret.hi = cp;
-
-  return ret;
-}
-
-#endif
diff --git a/libclc/generic/lib/math/sincos_helpers.h 
b/libclc/generic/lib/math/sincos_helpers.h
deleted file mode 100644
index 11cb93f34850d..0000000000000
--- a/libclc/generic/lib/math/sincos_helpers.h
+++ /dev/null
@@ -1,24 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/clcfunc.h>
-#include <clc/clctypes.h>
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DECL void __clc_remainder_piby2_medium(double x, private double *r,
-                                            private double *rr,
-                                            private int *regn);
-_CLC_DECL void __clc_remainder_piby2_large(double x, private double *r,
-                                           private double *rr,
-                                           private int *regn);
-_CLC_DECL double2 __clc_sincos_piby4(double x, double xx);
-
-#endif
diff --git a/libclc/generic/lib/math/tables.cl 
b/libclc/generic/lib/math/tables.cl
deleted file mode 100644
index 1bda480a9fa72..0000000000000
--- a/libclc/generic/lib/math/tables.cl
+++ /dev/null
@@ -1,30 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/clc.h>
-
-#include <clc/math/tables.h>
-
-DECLARE_TABLE(uchar, PIBITS_TBL, ) = {
-    224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175,
-    169, 75, 74, 41, 222, 231, 28, 244, 236, 197, 151, 175, 31,
-    235, 158, 212, 181, 168, 127, 121, 154, 253, 24, 61, 221, 38,
-    44, 159, 60, 251, 217, 180, 125, 180, 41, 104, 45, 70, 188,
-    188, 63, 96, 22, 120, 255, 95, 226, 127, 236, 160, 228, 247,
-    46, 126, 17, 114, 210, 231, 76, 13, 230, 88, 71, 230, 4, 249,
-    125, 209, 154, 192, 113, 166, 19, 18, 237, 186, 212, 215, 8,
-    162, 251, 156, 166, 196, 114, 172, 119, 248, 115, 72, 70, 39,
-    168, 187, 36, 25, 128, 75, 55, 9, 233, 184, 145, 220, 134, 21,
-    239, 122, 175, 142, 69, 249, 7, 65, 14, 241, 100, 86, 138, 109,
-    3, 119, 211, 212, 71, 95, 157, 240, 167, 84, 16, 57, 185, 13,
-    230, 139, 2, 0, 0, 0, 0, 0, 0, 0
-};
-
-uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) {
-    return *(__constant uint4 *)(PIBITS_TBL + idx);
-}
diff --git a/libclc/spirv/lib/SOURCES b/libclc/spirv/lib/SOURCES
index 5446fe13a6d93..20e052253d977 100644
--- a/libclc/spirv/lib/SOURCES
+++ b/libclc/spirv/lib/SOURCES
@@ -55,7 +55,6 @@ math/fma.cl
 ../../generic/lib/math/log2.cl
 ../../generic/lib/math/logb.cl
 ../../generic/lib/math/modf.cl
-../../generic/lib/math/tables.cl
 ../../generic/lib/math/pow.cl
 ../../generic/lib/math/pown.cl
 ../../generic/lib/math/powr.cl
@@ -64,7 +63,6 @@ math/fma.cl
 ../../generic/lib/math/rootn.cl
 ../../generic/lib/math/sin.cl
 ../../generic/lib/math/sincos.cl
-../../generic/lib/math/sincos_helpers.cl
 ../../generic/lib/math/sinh.cl
 ../../generic/lib/math/sinpi.cl
 ../../generic/lib/math/clc_tan.cl

>From 212ae34f61507cc71dcd20f21af7a02e8e62593a Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Thu, 1 May 2025 17:48:20 +0100
Subject: [PATCH 2/2] update table vars

---
 .../generic/math/clc_sincos_helpers_fp64.inc  | 69 ++++++++++---------
 1 file changed, 35 insertions(+), 34 deletions(-)

diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc 
b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc
index 3a8edb378cae2..9b5776d9b05e9 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc
@@ -59,7 +59,7 @@ __clc_remainder_piby2_large(__CLC_DOUBLEN x, private 
__CLC_DOUBLEN *r,
                             private __CLC_INTN *regn) {
 
   __CLC_LONGN ux = __CLC_AS_LONGN(x);
-  __CLC_INTN e = __CLC_CONVERT_INTN(ux >> 52) - 1023;
+  __CLC_INTN e = __CLC_CONVERT_INTN(ux >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
   __CLC_INTN i = __clc_max(23, (e >> 3) + 17);
   __CLC_INTN j = 150 - i;
   __CLC_INTN j16 = j & ~0xf;
@@ -68,18 +68,10 @@ __clc_remainder_piby2_large(__CLC_DOUBLEN x, private 
__CLC_DOUBLEN *r,
   // The following extracts 192 consecutive bits of 2/pi aligned on an 
arbitrary
   // byte boundary
   __CLC_ULONGN q0 = USE_TABLE(pibits_tbl, j16);
-  __CLC_ULONGN q3 = USE_TABLE(pibits_tbl, (j16 + 8));
-  __CLC_ULONGN q1 = USE_TABLE(pibits_tbl, (j16 + 16));
-  __CLC_ULONGN q4 = USE_TABLE(pibits_tbl, (j16 + 24));
-  __CLC_ULONGN q2 = USE_TABLE(pibits_tbl, (j16 + 32));
-
-  __CLC_INTN k = (j >> 2) & 0x3;
-  __CLC_INTN c0 = k == 0;
-  __CLC_INTN c1 = k == 1;
-  __CLC_INTN c2 = k == 2;
-  __CLC_INTN c3 = k == 3;
-
-  __CLC_UINTN u0, u1, u2, u3, u4, u5, u6;
+  __CLC_ULONGN q1 = USE_TABLE(pibits_tbl, (j16 + 8));
+  __CLC_ULONGN q2 = USE_TABLE(pibits_tbl, (j16 + 16));
+  __CLC_ULONGN q3 = USE_TABLE(pibits_tbl, (j16 + 24));
+  __CLC_ULONGN q4 = USE_TABLE(pibits_tbl, (j16 + 32));
 
   __CLC_UINTN q0s0 = __CLC_CONVERT_UINTN(q0);
   __CLC_UINTN q0s1 = __CLC_CONVERT_UINTN(q0 >> 32);
@@ -96,33 +88,41 @@ __clc_remainder_piby2_large(__CLC_DOUBLEN x, private 
__CLC_DOUBLEN *r,
   __CLC_UINTN q4s0 = __CLC_CONVERT_UINTN(q4);
   __CLC_UINTN q4s1 = __CLC_CONVERT_UINTN(q4 >> 32);
 
+  __CLC_INTN k = (j >> 2) & 0x3;
+  __CLC_INTN c0 = k == 0;
+  __CLC_INTN c1 = k == 1;
+  __CLC_INTN c2 = k == 2;
+  __CLC_INTN c3 = k == 3;
+
+  __CLC_UINTN u0, u1, u2, u3, u4, u5, u6;
+
   u0 = c1 ? q0s1 : q0s0;
-  u0 = c2 ? q3s0 : u0;
-  u0 = c3 ? q3s1 : u0;
+  u0 = c2 ? q1s0 : u0;
+  u0 = c3 ? q1s1 : u0;
 
-  u1 = c1 ? q3s0 : q0s1;
-  u1 = c2 ? q3s1 : u1;
-  u1 = c3 ? q1s0 : u1;
+  u1 = c1 ? q1s0 : q0s1;
+  u1 = c2 ? q1s1 : u1;
+  u1 = c3 ? q2s0 : u1;
 
-  u2 = c1 ? q3s1 : q3s0;
-  u2 = c2 ? q1s0 : u2;
-  u2 = c3 ? q1s1 : u2;
+  u2 = c1 ? q1s1 : q1s0;
+  u2 = c2 ? q2s0 : u2;
+  u2 = c3 ? q2s1 : u2;
 
-  u3 = c1 ? q1s0 : q3s1;
-  u3 = c2 ? q1s1 : u3;
-  u3 = c3 ? q4s0 : u3;
+  u3 = c1 ? q2s0 : q1s1;
+  u3 = c2 ? q2s1 : u3;
+  u3 = c3 ? q3s0 : u3;
 
-  u4 = c1 ? q1s1 : q1s0;
-  u4 = c2 ? q4s0 : u4;
-  u4 = c3 ? q4s1 : u4;
+  u4 = c1 ? q2s1 : q2s0;
+  u4 = c2 ? q3s0 : u4;
+  u4 = c3 ? q3s1 : u4;
 
-  u5 = c1 ? q4s0 : q1s1;
-  u5 = c2 ? q4s1 : u5;
-  u5 = c3 ? q2s0 : u5;
+  u5 = c1 ? q3s0 : q2s1;
+  u5 = c2 ? q3s1 : u5;
+  u5 = c3 ? q4s0 : u5;
 
-  u6 = c1 ? q4s1 : q4s0;
-  u6 = c2 ? q2s0 : u6;
-  u6 = c3 ? q2s1 : u6;
+  u6 = c1 ? q3s1 : q3s0;
+  u6 = c2 ? q4s0 : u6;
+  u6 = c3 ? q4s1 : u6;
 
   __CLC_UINTN v0 = bytealign(u1, u0, j);
   __CLC_UINTN v1 = bytealign(u2, u1, j);
@@ -142,7 +142,8 @@ __clc_remainder_piby2_large(__CLC_DOUBLEN x, private 
__CLC_DOUBLEN *r,
   __CLC_AS_DOUBLEN(__CLC_CONVERT_ULONGN((x)) & 0xFFFFFFFF |                    
\
                    __CLC_CONVERT_ULONGN((y)) << 32)
 
-  __CLC_UINTN ua = __CLC_CONVERT_UINTN(1023 + 52 - i) << 20;
+  __CLC_UINTN ua = __CLC_CONVERT_UINTN(EXPBIAS_DP64 + EXPSHIFTBITS_DP64 - i)
+                   << 20;
   __CLC_DOUBLEN a = doublen_lohi((__CLC_ULONGN)0, ua);
   __CLC_DOUBLEN p0 = doublen_lohi(v0, ua | (v1 & 0xffffU)) - a;
   ua += 0x03000000U;

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to