https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/186047

>From 25baf98ea916bb255581d20cdc217cf076b25bcd Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Wed, 11 Mar 2026 08:41:22 +0100
Subject: [PATCH] libclc: Add ep utility

Add utility for compensated arithmetic, which should be used
by a number of the large functions.
---
 libclc/clc/include/clc/math/clc_ep.h   |  21 ++
 libclc/clc/include/clc/math/clc_ep.inc | 131 +++++++++
 libclc/clc/lib/generic/CMakeLists.txt  |   1 +
 libclc/clc/lib/generic/math/clc_ep.cl  |  34 +++
 libclc/clc/lib/generic/math/clc_ep.inc | 391 +++++++++++++++++++++++++
 5 files changed, 578 insertions(+)
 create mode 100644 libclc/clc/include/clc/math/clc_ep.h
 create mode 100644 libclc/clc/include/clc/math/clc_ep.inc
 create mode 100644 libclc/clc/lib/generic/math/clc_ep.cl
 create mode 100644 libclc/clc/lib/generic/math/clc_ep.inc

diff --git a/libclc/clc/include/clc/math/clc_ep.h 
b/libclc/clc/include/clc/math/clc_ep.h
new file mode 100644
index 0000000000000..1834ba05d7e50
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_ep.h
@@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility functions implementing compensated arithmetic.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_EP_H__
+#define __CLC_MATH_CLC_EP_H__
+
+#include "clc/internal/clc.h"
+
+#define __CLC_BODY <clc/math/clc_ep.inc>
+#include <clc/math/gentype.inc>
+
+#endif // __CLC_MATH_CLC_EP_H__
diff --git a/libclc/clc/include/clc/math/clc_ep.inc 
b/libclc/clc/include/clc/math/clc_ep.inc
new file mode 100644
index 0000000000000..81bd77978dbab
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_ep.inc
@@ -0,0 +1,131 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef __CLC_SCALAR
+
+#define __CLC_EP_PAIR __CLC_XCONCAT(__CLC_GENTYPE, 2)
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_absv(__CLC_EP_PAIR 
a);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_conditional_sign_match(__CLC_EP_PAIR a, __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_make_pair(__CLC_GENTYPE a, __CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_add(__CLC_GENTYPE a, __CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_normalize(__CLC_EP_PAIR a);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_normalize_overflow(__CLC_EP_PAIR a);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_sub(__CLC_GENTYPE a, __CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_add(__CLC_GENTYPE a,
+                                                              __CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sub(__CLC_GENTYPE a,
+                                                              __CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_mul(__CLC_GENTYPE a,
+                                                              __CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqr(__CLC_GENTYPE a);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_add(__CLC_EP_PAIR a,
+                                                              __CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_add(__CLC_EP_PAIR a, __CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_add(__CLC_GENTYPE a,
+                                                              __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_add(__CLC_GENTYPE a, __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_add(__CLC_EP_PAIR a,
+                                                              __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_add(__CLC_EP_PAIR a, __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sub(__CLC_EP_PAIR a,
+                                                              __CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_sub(__CLC_EP_PAIR a, __CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sub(__CLC_GENTYPE a,
+                                                              __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_sub(__CLC_GENTYPE a, __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sub(__CLC_EP_PAIR a,
+                                                              __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_sub(__CLC_EP_PAIR a, __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_ldexp(__CLC_EP_PAIR 
a,
+                                                                int e);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_mul(__CLC_EP_PAIR a,
+                                                              __CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_omul(__CLC_EP_PAIR a,
+                                                               __CLC_GENTYPE 
b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_mul(__CLC_GENTYPE a,
+                                                              __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_omul(__CLC_GENTYPE a,
+                                                               __CLC_EP_PAIR 
b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_mul(__CLC_EP_PAIR a,
+                                                              __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_omul(__CLC_EP_PAIR a,
+                                                               __CLC_EP_PAIR 
b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_div(__CLC_GENTYPE a,
+                                                              __CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_div(__CLC_EP_PAIR a,
+                                                              __CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_div(__CLC_GENTYPE a,
+                                                              __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_div(__CLC_EP_PAIR a, __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_div(__CLC_EP_PAIR a,
+                                                              __CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_recip(__CLC_GENTYPE b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_recip(__CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_recip(__CLC_EP_PAIR b);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqr(__CLC_EP_PAIR a);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqrt(__CLC_GENTYPE 
a);
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqrt(__CLC_EP_PAIR 
a);
+
+#endif
diff --git a/libclc/clc/lib/generic/CMakeLists.txt 
b/libclc/clc/lib/generic/CMakeLists.txt
index f9e3c91817cd2..1be28882ddf82 100644
--- a/libclc/clc/lib/generic/CMakeLists.txt
+++ b/libclc/clc/lib/generic/CMakeLists.txt
@@ -74,6 +74,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
   math/clc_cospi.cl
   math/clc_div_cr.cl
   math/clc_div_fast.cl
+  math/clc_ep.cl
   math/clc_ep_log.cl
   math/clc_erf.cl
   math/clc_erfc.cl
diff --git a/libclc/clc/lib/generic/math/clc_ep.cl 
b/libclc/clc/lib/generic/math/clc_ep.cl
new file mode 100644
index 0000000000000..a85107bc56b71
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_ep.cl
@@ -0,0 +1,34 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/clc_convert.h"
+#include "clc/math/clc_div_fast.h"
+#include "clc/math/clc_ep.h"
+#include "clc/math/clc_fma.h"
+#include "clc/math/clc_ldexp.h"
+#include "clc/math/clc_recip_fast.h"
+#include "clc/math/clc_sqrt_fast.h"
+#include "clc/relational/clc_isinf.h"
+#include "clc/relational/clc_signbit.h"
+
+#ifdef cl_khr_fp16
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST static half ep_high_fp_bits(half x) {
+  return __clc_as_half((ushort)(__clc_as_ushort(x) & (ushort)0xffc0U));
+}
+#endif
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST static float ep_high_fp_bits(float x) {
+  return __clc_as_float(__clc_as_uint(x) & 0xfffff000U);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST static double ep_high_fp_bits(double x) {
+  return __clc_as_double(__clc_as_ulong(x) & 0xfffffffff8000000UL);
+}
+
+#define __CLC_BODY <clc_ep.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_ep.inc 
b/libclc/clc/lib/generic/math/clc_ep.inc
new file mode 100644
index 0000000000000..de2d4f079ac8b
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_ep.inc
@@ -0,0 +1,391 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef __CLC_SCALAR
+
+#if __CLC_FPSIZE == 16
+#define CLC_EP_USE_FMA true
+#elif __CLC_FPSIZE == 32
+#if defined(FP_FAST_FMAF)
+#define CLC_EP_USE_FMA true
+#else
+#define CLC_EP_USE_FMA false
+#endif
+
+#elif __CLC_FPSIZE == 64
+#if defined(FP_FAST_FMA)
+#define CLC_EP_USE_FMA true
+#else
+#define CLC_EP_USE_FMA false
+#endif
+#endif
+
+#pragma OPENCL FP_CONTRACT OFF
+
+_CLC_OVERLOAD
+static bool samesign(__CLC_GENTYPE x, __CLC_GENTYPE y) {
+  return __clc_signbit(x) == __clc_signbit(y);
+}
+
+_CLC_DEF
+_CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_absv(__CLC_EP_PAIR a) {
+  return __clc_signbit(a.hi) ? -a : a;
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_conditional_sign_match(__CLC_EP_PAIR a, __CLC_EP_PAIR b) {
+  return samesign(a.hi, b.hi) ? a : -a;
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_make_pair(__CLC_GENTYPE a, __CLC_GENTYPE b) {
+  return (__CLC_EP_PAIR)(b, a);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_add(__CLC_GENTYPE a, __CLC_GENTYPE b) {
+  __CLC_GENTYPE s = a + b;
+  return __clc_ep_make_pair(s, b - (s - a));
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_normalize(__CLC_EP_PAIR a) {
+  return __clc_ep_fast_add(a.hi, a.lo);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_normalize_overflow(__CLC_EP_PAIR a) {
+  __CLC_GENTYPE s = a.hi + a.lo;
+  __CLC_GENTYPE t = a.lo - (s - a.hi);
+  s = __clc_isinf(a.hi) ? a.hi : s;
+  return __clc_ep_make_pair(s, __clc_isinf(s) ? __CLC_FP_LIT(0.0) : t);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_sub(__CLC_GENTYPE a, __CLC_GENTYPE b) {
+  __CLC_GENTYPE d = a - b;
+  return __clc_ep_make_pair(d, (a - d) - b);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_add(__CLC_GENTYPE a,
+                                                             __CLC_GENTYPE b) {
+  __CLC_GENTYPE s = a + b;
+  __CLC_GENTYPE d = s - a;
+  return __clc_ep_make_pair(s, (a - (s - d)) + (b - d));
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sub(__CLC_GENTYPE a,
+                                                             __CLC_GENTYPE b) {
+  __CLC_GENTYPE d = a - b;
+  __CLC_GENTYPE e = d - a;
+  return __clc_ep_make_pair(d, (a - (d - e)) - (b + e));
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_mul(__CLC_GENTYPE a,
+                                                             __CLC_GENTYPE b) {
+  __CLC_GENTYPE p = a * b;
+  if (CLC_EP_USE_FMA) {
+    return __clc_ep_make_pair(p, __clc_fma(a, b, -p));
+  }
+
+  __CLC_GENTYPE ah = ep_high_fp_bits(a);
+  __CLC_GENTYPE al = a - ah;
+  __CLC_GENTYPE bh = ep_high_fp_bits(b);
+  __CLC_GENTYPE bl = b - bh;
+  return __clc_ep_make_pair(p, ((ah * bh - p) + ah * bl + al * bh) + al * bl);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqr(__CLC_GENTYPE a) {
+  __CLC_GENTYPE p = a * a;
+  if (CLC_EP_USE_FMA)
+    return __clc_ep_make_pair(p, __clc_fma(a, a, -p));
+
+  __CLC_GENTYPE ah = ep_high_fp_bits(a);
+  __CLC_GENTYPE al = a - ah;
+  return __clc_ep_make_pair(p, ((ah * ah - p) + __CLC_FP_LIT(2.0) * ah * al) +
+                                   al * al);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_add(__CLC_EP_PAIR a,
+                                                             __CLC_GENTYPE b) {
+  __CLC_EP_PAIR s = __clc_ep_add(a.hi, b);
+  s.lo += a.lo;
+  return __clc_ep_normalize(s);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_add(__CLC_EP_PAIR a, __CLC_GENTYPE b) {
+  __CLC_EP_PAIR s = __clc_ep_fast_add(a.hi, b);
+  s.lo += a.lo;
+  return __clc_ep_normalize(s);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_add(__CLC_GENTYPE a,
+                                                             __CLC_EP_PAIR b) {
+  __CLC_EP_PAIR s = __clc_ep_add(a, b.hi);
+  s.lo += b.lo;
+  return __clc_ep_normalize(s);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_add(__CLC_GENTYPE a, __CLC_EP_PAIR b) {
+  __CLC_EP_PAIR s = __clc_ep_fast_add(a, b.hi);
+  s.lo += b.lo;
+  return __clc_ep_normalize(s);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_add(__CLC_EP_PAIR a,
+                                                             __CLC_EP_PAIR b) {
+  __CLC_EP_PAIR s = __clc_ep_add(a.hi, b.hi);
+  __CLC_EP_PAIR t = __clc_ep_add(a.lo, b.lo);
+  s.lo += t.hi;
+  s = __clc_ep_normalize(s);
+  s.lo += t.lo;
+  return __clc_ep_normalize(s);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_add(__CLC_EP_PAIR a, __CLC_EP_PAIR b) {
+  __CLC_EP_PAIR s = __clc_ep_fast_add(a.hi, b.hi);
+  s.lo += a.lo + b.lo;
+  return __clc_ep_normalize(s);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sub(__CLC_EP_PAIR a,
+                                                             __CLC_GENTYPE b) {
+  __CLC_EP_PAIR d = __clc_ep_sub(a.hi, b);
+  d.lo += a.lo;
+  return __clc_ep_normalize(d);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_sub(__CLC_EP_PAIR a, __CLC_GENTYPE b) {
+  __CLC_EP_PAIR d = __clc_ep_fast_sub(a.hi, b);
+  d.lo += a.lo;
+  return __clc_ep_normalize(d);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sub(__CLC_GENTYPE a,
+                                                             __CLC_EP_PAIR b) {
+  __CLC_EP_PAIR d = __clc_ep_sub(a, b.hi);
+  d.lo -= b.lo;
+  return __clc_ep_normalize(d);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_sub(__CLC_GENTYPE a, __CLC_EP_PAIR b) {
+  __CLC_EP_PAIR d = __clc_ep_fast_sub(a, b.hi);
+  d.lo -= b.lo;
+  return __clc_ep_normalize(d);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sub(__CLC_EP_PAIR a,
+                                                             __CLC_EP_PAIR b) {
+  __CLC_EP_PAIR d = __clc_ep_sub(a.hi, b.hi);
+  __CLC_EP_PAIR e = __clc_ep_sub(a.lo, b.lo);
+  d.lo += e.hi;
+  d = __clc_ep_normalize(d);
+  d.lo += e.lo;
+  return __clc_ep_normalize(d);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_sub(__CLC_EP_PAIR a, __CLC_EP_PAIR b) {
+  __CLC_EP_PAIR d = __clc_ep_fast_sub(a.hi, b.hi);
+  d.lo = d.lo + a.lo - b.lo;
+  return __clc_ep_normalize(d);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_ldexp(__CLC_EP_PAIR a,
+                                                               int e) {
+  return __clc_ep_make_pair(__clc_ldexp(a.hi, e), __clc_ldexp(a.lo, e));
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_mul(__CLC_EP_PAIR a,
+                                                             __CLC_GENTYPE b) {
+  __CLC_EP_PAIR p = __clc_ep_mul(a.hi, b);
+  if (CLC_EP_USE_FMA) {
+    p.lo = __clc_fma(a.lo, b, p.lo);
+  } else {
+    p.lo += a.lo * b;
+  }
+  return __clc_ep_normalize(p);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_mul_overflow(__CLC_EP_PAIR a, __CLC_GENTYPE b) {
+  __CLC_EP_PAIR p = __clc_ep_mul(a.hi, b);
+  if (CLC_EP_USE_FMA) {
+    p.lo = __clc_fma(a.lo, b, p.lo);
+  } else {
+    p.lo += a.lo * b;
+  }
+  return __clc_ep_normalize_overflow(p);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_mul(__CLC_GENTYPE a,
+                                                             __CLC_EP_PAIR b) {
+  __CLC_EP_PAIR p = __clc_ep_mul(a, b.hi);
+  if (CLC_EP_USE_FMA) {
+    p.lo = __clc_fma(a, b.lo, p.lo);
+  } else {
+    p.lo += a * b.lo;
+  }
+  return __clc_ep_normalize(p);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_mul_overflow(__CLC_GENTYPE a, __CLC_EP_PAIR b) {
+  __CLC_EP_PAIR p = __clc_ep_mul(a, b.hi);
+  if (CLC_EP_USE_FMA) {
+    p.lo = __clc_fma(a, b.lo, p.lo);
+  } else {
+    p.lo += a * b.lo;
+  }
+  return __clc_ep_normalize_overflow(p);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_mul(__CLC_EP_PAIR a,
+                                                             __CLC_EP_PAIR b) {
+  __CLC_EP_PAIR p = __clc_ep_mul(a.hi, b.hi);
+  if (CLC_EP_USE_FMA) {
+    p.lo = __clc_fma(a.lo, b.hi, __clc_fma(a.hi, b.lo, p.lo));
+  } else {
+    p.lo += a.hi * b.lo + a.lo * b.hi;
+  }
+  return __clc_ep_normalize(p);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_mul_overflow(__CLC_EP_PAIR a, __CLC_EP_PAIR b) {
+  __CLC_EP_PAIR p = __clc_ep_mul(a.hi, b.hi);
+  if (CLC_EP_USE_FMA) {
+    p.lo += __clc_fma(a.hi, b.lo, a.lo * b.hi);
+  } else {
+    p.lo += a.hi * b.lo + a.lo * b.hi;
+  }
+  return __clc_ep_normalize_overflow(p);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_div(__CLC_GENTYPE a,
+                                                             __CLC_GENTYPE b) {
+  __CLC_GENTYPE r = __clc_recip_fast(b);
+  __CLC_GENTYPE qhi = a * r;
+  __CLC_EP_PAIR p = __clc_ep_mul(qhi, b);
+  __CLC_EP_PAIR d = __clc_ep_fast_sub(a, p.hi);
+  d.lo -= p.lo;
+  __CLC_GENTYPE qlo = (d.hi + d.lo) * r;
+  return __clc_ep_fast_add(qhi, qlo);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_div(__CLC_EP_PAIR a,
+                                                             __CLC_GENTYPE b) {
+  __CLC_GENTYPE r = __clc_recip_fast(b);
+  __CLC_GENTYPE qhi = a.hi * r;
+  __CLC_EP_PAIR p = __clc_ep_mul(qhi, b);
+  __CLC_EP_PAIR d = __clc_ep_fast_sub(a.hi, p.hi);
+  d.lo = d.lo + a.lo - p.lo;
+  __CLC_GENTYPE qlo = (d.hi + d.lo) * r;
+  return __clc_ep_fast_add(qhi, qlo);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_div(__CLC_GENTYPE a,
+                                                             __CLC_EP_PAIR b) {
+  __CLC_GENTYPE r = __clc_recip_fast(b.hi);
+  __CLC_GENTYPE qhi = a * r;
+  __CLC_EP_PAIR p = __clc_ep_mul(qhi, b);
+  __CLC_EP_PAIR d = __clc_ep_fast_sub(a, p.hi);
+  d.lo -= p.lo;
+  __CLC_GENTYPE qlo = (d.hi + d.lo) * r;
+  return __clc_ep_fast_add(qhi, qlo);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_div(__CLC_EP_PAIR a, __CLC_EP_PAIR b) {
+  __CLC_GENTYPE r = __clc_recip_fast(b.hi);
+  __CLC_GENTYPE qhi = a.hi * r;
+  __CLC_EP_PAIR p = __clc_ep_mul(qhi, b);
+  __CLC_EP_PAIR d = __clc_ep_fast_sub(a.hi, p.hi);
+  d.lo = d.lo - p.lo + a.lo;
+  __CLC_GENTYPE qlo = (d.hi + d.lo) * r;
+  return __clc_ep_fast_add(qhi, qlo);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_div(__CLC_EP_PAIR a,
+                                                             __CLC_EP_PAIR b) {
+  __CLC_GENTYPE y = __clc_recip_fast(b.hi);
+  __CLC_GENTYPE qhi = a.hi * y;
+  __CLC_EP_PAIR r = __clc_ep_fast_sub(a, __clc_ep_mul(qhi, b));
+  __CLC_GENTYPE qmi = r.hi * y;
+  r = __clc_ep_fast_sub(r, __clc_ep_mul(qmi, b));
+  __CLC_GENTYPE qlo = r.hi * y;
+  __CLC_EP_PAIR q = __clc_ep_fast_add(qhi, qmi);
+  q.lo += qlo;
+  return __clc_ep_normalize(q);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_recip(__CLC_GENTYPE b) {
+  __CLC_GENTYPE qhi = __clc_recip_fast(b);
+  __CLC_EP_PAIR p = __clc_ep_mul(qhi, b);
+  __CLC_EP_PAIR d = __clc_ep_fast_sub(__CLC_FP_LIT(1.0), p.hi);
+  d.lo -= p.lo;
+  __CLC_GENTYPE qlo = (d.hi + d.lo) * qhi;
+  return __clc_ep_fast_add(qhi, qlo);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_fast_recip(__CLC_EP_PAIR b) {
+  __CLC_GENTYPE qhi = __clc_recip_fast(b.hi);
+  __CLC_EP_PAIR p = __clc_ep_mul(qhi, b);
+  __CLC_EP_PAIR d = __clc_ep_fast_sub(__CLC_FP_LIT(1.0), p.hi);
+  d.lo -= p.lo;
+  __CLC_GENTYPE qlo = (d.hi + d.lo) * qhi;
+  return __clc_ep_fast_add(qhi, qlo);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
+__clc_ep_recip(__CLC_EP_PAIR b) {
+  __CLC_GENTYPE qhi = __clc_recip_fast(b.hi);
+  __CLC_EP_PAIR r = __clc_ep_fast_sub(__CLC_FP_LIT(1.0), __clc_ep_mul(qhi, b));
+  __CLC_GENTYPE qmi = r.hi * qhi;
+  r = __clc_ep_fast_sub(r, __clc_ep_mul(qmi, b));
+  __CLC_GENTYPE qlo = r.hi * qhi;
+  __CLC_EP_PAIR q = __clc_ep_fast_add(qhi, qmi);
+  q.lo += qlo;
+  return __clc_ep_normalize(q);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqr(__CLC_EP_PAIR a) {
+  __CLC_EP_PAIR p = __clc_ep_sqr(a.hi);
+  if (CLC_EP_USE_FMA) {
+    p.lo = __clc_fma(a.hi, __CLC_FP_LIT(2.0) * a.lo, p.lo);
+  } else {
+    p.lo = p.lo + __CLC_FP_LIT(2.0) * a.lo * a.hi;
+  }
+  return __clc_ep_normalize(p);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqrt(__CLC_GENTYPE a) 
{
+  __CLC_GENTYPE shi = __clc_sqrt_fast(a);
+  __CLC_EP_PAIR e = __clc_ep_fast_sub(a, __clc_ep_sqr(shi));
+  __CLC_GENTYPE slo = __clc_div_fast(e.hi, __CLC_FP_LIT(2.0) * shi);
+  return __clc_ep_fast_add(shi,
+                           a == __CLC_FP_LIT(0.0) ? __CLC_FP_LIT(0.0) : slo);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_sqrt(__CLC_EP_PAIR a) 
{
+  __CLC_GENTYPE shi = __clc_sqrt_fast(a.hi);
+  __CLC_EP_PAIR e = __clc_ep_fast_sub(a, __clc_ep_sqr(shi));
+  __CLC_GENTYPE slo = __clc_div_fast(e.hi, __CLC_FP_LIT(2.0) * shi);
+  return __clc_ep_fast_add(shi,
+                           a.hi == __CLC_FP_LIT(0.0) ? __CLC_FP_LIT(0.0) : 
slo);
+}
+
+#undef CLC_EP_USE_FMA
+#endif

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to