libstdc++-v3/ChangeLog:

        * include/bits/simd_math.h: New file.

Signed-off-by: Matthias Kretz <[email protected]>
---
 libstdc++-v3/include/bits/simd_math.h | 993 ++++++++++++++++++++++++++
 1 file changed, 993 insertions(+)
 create mode 100644 libstdc++-v3/include/bits/simd_math.h


--
──────────────────────────────────────────────────────────────────────────
 Dr. Matthias Kretz                           https://mattkretz.github.io
 GSI Helmholtz Center for Heavy Ion Research               https://gsi.de
 std::simd
──────────────────────────────────────────────────────────────────────────
diff --git a/libstdc++-v3/include/bits/simd_math.h b/libstdc++-v3/include/bits/simd_math.h
new file mode 100644
index 00000000000..deee7fe4e2e
--- /dev/null
+++ b/libstdc++-v3/include/bits/simd_math.h
@@ -0,0 +1,993 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later WITH GCC-exception-3.1 */
+/* Copyright © 2025      GSI Helmholtzzentrum fuer Schwerionenforschung GmbH
+ *                       Matthias Kretz <[email protected]>
+ */
+
+#ifndef _GLIBCXX_SIMD_MATH_H
+#define _GLIBCXX_SIMD_MATH_H 1
+
+#ifdef _GLIBCXX_SYSHDR
+#pragma GCC system_header
+#endif
+
+#if __cplusplus >= 202400L
+
+#include "simd_vec.h"
+
+// psabi warnings are bogus because the ABI of the internal types never leaks into user code
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpsabi"
+
+// [simd.math] ----------------------------------------------------------------
+namespace std::simd
+{
+  /** \internal
+   * Check whether \p __s matches "(( *__simd__". If it matches we assume it's a simd-clones
+   * attribute declaration as used in glibc's <math.h>.
+   */
+  consteval bool
+    __is_attribute_simd(const char* __s)
+  {
+    if (not __s) return false;
+    while (*__s != '(' and *__s != '\0') ++__s;
+    if (*__s == '\0') return false;
+    if (*++__s != '(') return false;
+    while (__s[1] == ' ') ++__s;
+    if (*++__s != '_') return false;
+    if (*++__s != '_') return false;
+    if (*++__s != 's') return false;
+    if (*++__s != 'i') return false;
+    if (*++__s != 'm') return false;
+    if (*++__s != 'd') return false;
+    if (*++__s != '_') return false;
+    if (*++__s != '_') return false;
+    return true;
+  }
+
+#define _GLIBCXX_SIMD_HAS_SIMD_CLONE(fn)                                                           \
+  __is_attribute_simd(_GLIBCXX_SIMD_TOSTRING(__SIMD_DECL(fn)))
+
+  // __FAST_MATH__ must be defined when including <math.h> in order to get calls to simd-clones of
+  // the math functions. Therefore, with __FAST_MATH__ we can inline everything, without it we need
+  // to call into the library, which can be compiled with fast-math.
+  // Also when building with __FAST_MATH__, we can ignore many corner cases (inf, NaN, negative
+  // zero) and call to a simpler (faster) implementation. With __FAST_MATH__ simd::sin calls
+  // simd::__fast_sin (unless inlined to a simd-clone), otherwise to simd::__sin (which internally
+  // might call __fast_sin).
+  //
+  // The _GLIBCXX_SIMD_HAS_SIMD_CLONE macro determines whether a simd-clone is declared for the
+  // fn(double) math function. We assume that a float simd-clone exists then, too. However, if for
+  // some reason the body of __fast_fn does not compile to something GCC wants to inline, then the
+  // gnu_inline attribute makes a call to the __fast_fn function in the library.
+  //
+  // FIXME: The __fast_<fn>[_2x] and __<fn>[_2x] functions currently use a _TargetTraits template
+  // parameter, which is way too coarse.
+
+#if _GLIBCXX_X86
+
+  // A pair<V0, V1> would always return via MEMORY, not SSE, according to the AMD64 psABI.
+  // However, in this case we *really* could use return via two registers: [xyz]mm0 and [xyz]mm1. It
+  // seems like we can do the trick via inline asm. It would be much better to have a different
+  // calling convention function attribute, though.
+  // (asm: it is important to fake a read-write to xmm0 in order to ensure no function gets called
+  // without saving xmm1 to the stack - unless xmm1 is an argument to that function call)
+#define _GLIBCXX_SIMD_MATH_2X_CALL(fn, arg)                                                        \
+  remove_cvref_t<decltype(arg._M_get_high()._M_get())> __hi;                                       \
+  auto __lo = fn(arg._M_get_low()._M_get(), arg._M_get_high()._M_get());                           \
+  asm("" : "={xmm1}"(__hi), "+{xmm0}"(__lo))
+
+#define _GLIBCXX_SIMD_MATH_2X_CALL2(fn, arg0, arg1)                                                \
+  remove_cvref_t<decltype(arg0._M_get_high()._M_get())> __hi;                                      \
+  auto __lo = fn(arg0._M_get_low()._M_get(), arg1._M_get_low()._M_get(),                           \
+                 arg0._M_get_high()._M_get(), arg1._M_get_high()._M_get());                        \
+  asm("" : "={xmm1}"(__hi), "+{xmm0}"(__lo))
+
+#define _GLIBCXX_SIMD_MATH_RET_TYPE _V0
+
+#else
+
+#define _GLIBCXX_SIMD_MATH_2X_CALL(fn, arg)                                                        \
+  const auto [__lo, __hi] = fn(arg._M_get_low()._M_get(), arg._M_get_high()._M_get())
+
+#define _GLIBCXX_SIMD_MATH_2X_CALL2(fn, arg0, arg1)                                                \
+  const auto [__lo, __hi] = fn(arg0._M_get_low()._M_get(), arg1._M_get_low()._M_get(),             \
+                               arg0._M_get_high()._M_get(), arg1._M_get_high()._M_get())
+
+#define _GLIBCXX_SIMD_MATH_RET_TYPE pair<_V0, _V1>
+
+#endif
+
+#define _GLIBCXX_SIMD_MATH_CALL(fn)                                                                \
+  template <typename _TV>                                                                          \
+    requires (_GLIBCXX_SIMD_HAS_SIMD_CLONE(fn))                                                    \
+    [[__gnu__::__gnu_inline__]]                                                                    \
+    inline _TV                                                                                     \
+    __fast_##fn(_TV __x)                                                                           \
+    {                                                                                              \
+      constexpr auto [...__is] = __iota<int[__width_of<_TV>]>;                                     \
+      return _TV{std::fn(__x[__is])...};                                                           \
+    }                                                                                              \
+                                                                                                   \
+  template <typename _Vp, _TargetTraits = {}>                                                      \
+    requires (not _GLIBCXX_SIMD_HAS_SIMD_CLONE(fn))                                                \
+    extern _Vp                                                                                     \
+    __fast_##fn(_Vp);                                                                              \
+                                                                                                   \
+  template <typename _V0, typename _V1, _TargetTraits = {}>                                        \
+    requires (not _GLIBCXX_SIMD_HAS_SIMD_CLONE(fn))                                                \
+    extern _GLIBCXX_SIMD_MATH_RET_TYPE                                                             \
+    __fast_2x_##fn(_V0, _V1);                                                                      \
+                                                                                                   \
+  template <typename _Vp, _TargetTraits = {}>                                                      \
+    _Vp                                                                                            \
+    __##fn(_Vp);                                                                                   \
+                                                                                                   \
+  template <typename _V0, typename _V1, _TargetTraits = {}>                                        \
+    _GLIBCXX_SIMD_MATH_RET_TYPE                                                                    \
+    __2x_##fn(_V0, _V1);                                                                           \
+                                                                                                   \
+  template<__math_floating_point _Vp, _TargetTraits _Traits = {}>                                  \
+    [[__gnu__::__always_inline__]]                                                                 \
+    constexpr __deduced_vec_t<_Vp>                                                                 \
+    fn(const _Vp& __x)                                                                             \
+    {                                                                                              \
+      if constexpr (not is_same_v<_Vp, __deduced_vec_t<_Vp>>)                                      \
+        return fn(static_cast<const __deduced_vec_t<_Vp>&>(__x));                                  \
+      else if (__builtin_is_constant_evaluated() or __x._M_is_constprop())                         \
+        return _Vp([&] [[__gnu__::__always_inline__]] (int __i) {                                  \
+                 return std::fn(__x[__i]);                                                         \
+               });                                                                                 \
+      else if constexpr (_Vp::size() == 1)                                                         \
+        return std::fn(__x[0]);                                                                    \
+      else if constexpr (_Traits.template _M_eval_as_f32<typename _Vp::value_type>())              \
+        return _Vp(fn(rebind_t<float, _Vp>(__x)));                                                 \
+      else if constexpr (_Vp::abi_type::_S_nreg == 1 and _Traits._M_fast_math())                   \
+        return __fast_##fn(__x._M_get());                                                          \
+      else if constexpr (_Vp::abi_type::_S_nreg == 1)                                              \
+        return __##fn(__x._M_get());                                                               \
+      else if constexpr (_Vp::abi_type::_S_nreg == 2 and _Traits._M_fast_math()                    \
+                           and not _GLIBCXX_SIMD_HAS_SIMD_CLONE(fn))                               \
+        {                                                                                          \
+          _GLIBCXX_SIMD_MATH_2X_CALL(__fast_2x_##fn, __x);                                         \
+          return _Vp::_S_init(__lo, __hi);                                                         \
+        }                                                                                          \
+      else if constexpr (_Vp::abi_type::_S_nreg == 2 and not _Traits._M_fast_math())               \
+        {                                                                                          \
+          _GLIBCXX_SIMD_MATH_2X_CALL(__2x_##fn, __x);                                              \
+          return _Vp::_S_init(__lo, __hi);                                                         \
+        }                                                                                          \
+      else                                                                                         \
+        return _Vp::_S_init(fn(__x._M_get_low()), fn(__x._M_get_high()));                          \
+    }
+
+#define _GLIBCXX_SIMD_MATH_CALL2(fn)                                                               \
+  template <typename _TV>                                                                          \
+    requires (_GLIBCXX_SIMD_HAS_SIMD_CLONE(fn))                                                    \
+    [[__gnu__::__gnu_inline__]]                                                                    \
+    inline _TV                                                                                     \
+    __fast_##fn(_TV __x0, _TV __x1)                                                                \
+    {                                                                                              \
+      constexpr auto [...__is] = __iota<int[__width_of<_TV>]>;                                     \
+      return _TV{std::fn(__x0[__is], __x1[__is])...};                                              \
+    }                                                                                              \
+                                                                                                   \
+  template <typename _TV, _TargetTraits = {}>                                                      \
+    requires (not _GLIBCXX_SIMD_HAS_SIMD_CLONE(fn))                                                \
+    extern _TV                                                                                     \
+    __fast_##fn(_TV, _TV);                                                                         \
+                                                                                                   \
+  template <typename _V0, typename _V1, _TargetTraits = {}>                                        \
+    requires (not _GLIBCXX_SIMD_HAS_SIMD_CLONE(fn))                                                \
+    extern _GLIBCXX_SIMD_MATH_RET_TYPE                                                             \
+    __fast_2x_##fn(_V0, _V0, _V1, _V1);                                                            \
+                                                                                                   \
+  template <typename _TV, _TargetTraits = {}>                                                      \
+    _TV                                                                                            \
+    __##fn(_TV, _TV);                                                                              \
+                                                                                                   \
+  template <typename _V0, typename _V1, _TargetTraits = {}>                                        \
+    _GLIBCXX_SIMD_MATH_RET_TYPE                                                                    \
+    __2x_##fn(_V0, _V0, _V1, _V1);                                                                 \
+                                                                                                   \
+  template <typename _V0, typename _V1, _TargetTraits _Traits = {}>                                \
+    [[__gnu__::__always_inline__]]                                                                 \
+    constexpr __math_common_simd_t<_V0, _V1>                                                       \
+    fn(const _V0& __x, const _V1& __y)                                                             \
+    {                                                                                              \
+      if constexpr (not is_same_v<_V0, __math_common_simd_t<_V0, _V1>>                             \
+                      or not is_same_v<_V1, __math_common_simd_t<_V0, _V1>>)                       \
+        return fn(static_cast<const __math_common_simd_t<_V0, _V1>&>(__x),                         \
+                  static_cast<const __math_common_simd_t<_V0, _V1>&>(__y));                        \
+      else if (__builtin_is_constant_evaluated()                                                   \
+                 or (__x._M_is_constprop() and __y._M_is_constprop()))                             \
+        return _V0([&] [[__gnu__::__always_inline__]] (int __i) {                                  \
+                 return std::fn(__x[__i], __y[__i]);                                               \
+               });                                                                                 \
+      else if constexpr (_V0::size() == 1)                                                         \
+        return std::fn(__x[0], __y[0]);                                                            \
+      else if constexpr (_Traits.template _M_eval_as_f32<typename _V0::value_type>())              \
+        return _V0(fn(rebind_t<float, _V0>(__x), rebind_t<float, _V0>(__y)));                      \
+      else if constexpr (_V0::abi_type::_S_nreg == 1 and _Traits._M_fast_math())                   \
+        return __fast_##fn(__x._M_get(), __y._M_get());                                            \
+      else if constexpr (_V0::abi_type::_S_nreg == 1)                                              \
+        return __##fn(__x._M_get(), __y._M_get());                                                 \
+      else if constexpr (_V0::abi_type::_S_nreg == 2 and _Traits._M_fast_math()                    \
+                           and not _GLIBCXX_SIMD_HAS_SIMD_CLONE(fn))                               \
+        {                                                                                          \
+          _GLIBCXX_SIMD_MATH_2X_CALL2(__fast_2x_##fn, __x, __y);                                   \
+          return _V0::_S_init(__lo, __hi);                                                         \
+        }                                                                                          \
+      else if constexpr (_V0::abi_type::_S_nreg == 2 and not _Traits._M_fast_math())               \
+        {                                                                                          \
+          _GLIBCXX_SIMD_MATH_2X_CALL2(__2x_##fn, __x, __y);                                        \
+          return _V0::_S_init(__lo, __hi);                                                         \
+        }                                                                                          \
+      else                                                                                         \
+        return _V0::_S_init(fn(__x._M_get_low(), __y._M_get_low()),                                \
+                            fn(__x._M_get_high(), __y._M_get_high()));                             \
+    }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr rebind_t<int, __deduced_vec_t<_Vp>>
+    ilogb(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr __deduced_vec_t<_Vp>
+    ldexp(const _Vp& __x, const rebind_t<int, __deduced_vec_t<_Vp>>& exp)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr __deduced_vec_t<_Vp>
+    scalbn(const _Vp& __x, const rebind_t<int, __deduced_vec_t<_Vp>>& n)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr __deduced_vec_t<_Vp>
+    scalbln(const _Vp& __x, const rebind_t<long int, __deduced_vec_t<_Vp>>& n)
+    { static_assert(false, "TODO"); }
+
+  template <signed_integral T, typename Abi>
+    [[__gnu__::__always_inline__]]
+    constexpr basic_vec<T, Abi>
+    abs(const basic_vec<T, Abi>& __x)
+    { return __x._M_abs(); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr __deduced_vec_t<_Vp>
+    abs(const _Vp& __x)
+    { return static_cast<const __deduced_vec_t<_Vp>&>(__x)._M_abs(); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr __deduced_vec_t<_Vp>
+    fabs(const _Vp& __x)
+    { return static_cast<const __deduced_vec_t<_Vp>&>(__x)._M_fabs(); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr __deduced_vec_t<_Vp>
+    ceil(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr __deduced_vec_t<_Vp>
+    floor(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    __deduced_vec_t<_Vp>
+    nearbyint(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    __deduced_vec_t<_Vp>
+    rint(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    rebind_t<long int, __deduced_vec_t<_Vp>>
+    lrint(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    rebind_t<long long int, __deduced_vec_t<_Vp>>
+    llrint(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr __deduced_vec_t<_Vp>
+    round(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr rebind_t<long int, __deduced_vec_t<_Vp>>
+    lround(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr rebind_t<long long int, __deduced_vec_t<_Vp>>
+    llround(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr __math_common_simd_t<_V0, _V1>
+    fmod(const _V0& __x, const _V1& __y)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr __deduced_vec_t<_Vp>
+    trunc(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr __math_common_simd_t<_V0, _V1>
+    remainder(const _V0& __x, const _V1& __y)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr __math_common_simd_t<_V0, _V1>
+    copysign(const _V0& __x, const _V1& __y)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr __math_common_simd_t<_V0, _V1>
+    nextafter(const _V0& __x, const _V1& __y)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr __math_common_simd_t<_V0, _V1>
+    fdim(const _V0& __x, const _V1& __y)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr __math_common_simd_t<_V0, _V1>
+    fmax(const _V0& __x, const _V1& __y)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr __math_common_simd_t<_V0, _V1>
+    fmin(const _V0& __x, const _V1& __y)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1, typename _V2>
+    [[__gnu__::__always_inline__]]
+    constexpr __math_common_simd_t<_V0, _V1, _V2>
+    fma(const _V0& __x, const _V1& __y, const _V2& __z)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr rebind_t<int, __deduced_vec_t<_Vp>>
+    fpclassify(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr typename __deduced_vec_t<_Vp>::mask_type
+    isfinite(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr typename __deduced_vec_t<_Vp>::mask_type
+    isinf(const _Vp& __x)
+    { return static_cast<const __deduced_vec_t<_Vp>&>(__x)._M_isinf(); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr typename __deduced_vec_t<_Vp>::mask_type
+    isnan(const _Vp& __x)
+    { return static_cast<const __deduced_vec_t<_Vp>&>(__x)._M_isnan(); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr typename __deduced_vec_t<_Vp>::mask_type
+    isnormal(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr typename __deduced_vec_t<_Vp>::mask_type
+    signbit(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr typename __math_common_simd_t<_V0, _V1>::mask_type
+    isgreater(const _V0& __x, const _V1& __y)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr typename __math_common_simd_t<_V0, _V1>::mask_type
+    isgreaterequal(const _V0& __x, const _V1& __y)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr typename __math_common_simd_t<_V0, _V1>::mask_type
+    isless(const _V0& __x, const _V1& __y)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr typename __math_common_simd_t<_V0, _V1>::mask_type
+    islessequal(const _V0& __x, const _V1& __y)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr typename __math_common_simd_t<_V0, _V1>::mask_type
+    islessgreater(const _V0& __x, const _V1& __y)
+    { static_assert(false, "TODO"); }
+
+  template <typename _V0, typename _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr typename __math_common_simd_t<_V0, _V1>::mask_type
+    isunordered(const _V0& __x, const _V1& __y)
+    {
+      using _Vp = __math_common_simd_t<_V0, _V1>;
+      if constexpr (__simd_integral<_V0> or is_integral_v<_V0>)
+        return __y._M_isnan();
+      else if constexpr (__simd_integral<_V1> or is_integral_v<_V1>)
+        return __x._M_isnan();
+      else
+        return static_cast<const _Vp&>(__x)._M_isunordered(static_cast<const _Vp&>(__y));
+    }
+
+  _GLIBCXX_SIMD_MATH_CALL(acos)
+  _GLIBCXX_SIMD_MATH_CALL(asin)
+  _GLIBCXX_SIMD_MATH_CALL(atan)
+  _GLIBCXX_SIMD_MATH_CALL2(atan2)
+  _GLIBCXX_SIMD_MATH_CALL(cos)
+  _GLIBCXX_SIMD_MATH_CALL(sin)
+  _GLIBCXX_SIMD_MATH_CALL(tan)
+  _GLIBCXX_SIMD_MATH_CALL(acosh)
+  _GLIBCXX_SIMD_MATH_CALL(asinh)
+  _GLIBCXX_SIMD_MATH_CALL(atanh)
+  _GLIBCXX_SIMD_MATH_CALL(cosh)
+  _GLIBCXX_SIMD_MATH_CALL(sinh)
+  _GLIBCXX_SIMD_MATH_CALL(tanh)
+
+  _GLIBCXX_SIMD_MATH_CALL(exp)
+  _GLIBCXX_SIMD_MATH_CALL(exp2)
+  _GLIBCXX_SIMD_MATH_CALL(expm1)
+
+  _GLIBCXX_SIMD_MATH_CALL(log)
+  _GLIBCXX_SIMD_MATH_CALL(log10)
+  _GLIBCXX_SIMD_MATH_CALL(log1p)
+  _GLIBCXX_SIMD_MATH_CALL(log2)
+  _GLIBCXX_SIMD_MATH_CALL(logb)
+
+  _GLIBCXX_SIMD_MATH_CALL(cbrt)
+
+  _GLIBCXX_SIMD_MATH_CALL2(hypot)
+
+  template<typename _V0, typename _V1, typename _V2, _TargetTraits _Traits = {}>
+    [[__gnu__::__always_inline__]]
+    constexpr __math_common_simd_t<_V0, _V1, _V2>
+    hypot(const _V0& __x, const _V1& __y, const _V2& __z)
+    {
+      if constexpr (not is_same_v<_V0, __math_common_simd_t<_V0, _V1, _V2>>
+                      or not is_same_v<_V1, __math_common_simd_t<_V0, _V1, _V2>>
+                      or not is_same_v<_V2, __math_common_simd_t<_V0, _V1, _V2>>)
+        return hypot(static_cast<const __math_common_simd_t<_V0, _V1, _V2>&>(__x),
+                     static_cast<const __math_common_simd_t<_V0, _V1, _V2>&>(__y),
+                     static_cast<const __math_common_simd_t<_V0, _V1, _V2>&>(__z));
+      else if (__builtin_is_constant_evaluated()
+                 or (__x._M_is_constprop() and __y._M_is_constprop() and __z._M_is_constprop()))
+        return _V0([&] [[__gnu__::__always_inline__]] (int __i) {
+                 return std::hypot(__x[__i], __y[__i], __z[__i]);
+               });
+      else if constexpr (_V0::size() == 1)
+        return std::hypot(__x[0], __y[0], __z[0]);
+      else if constexpr (_Traits.template _M_eval_as_f32<typename _V0::value_type>())
+        {
+          using _Vf = rebind_t<float, _V0>;
+          return _V0(hypot(_Vf(__x), _Vf(__y), _Vf(__z)));
+        }
+      else if constexpr (_V0::abi_type::_S_nreg > 2)
+        return _V0::_S_init(hypot(__x._M_get_low(), __y._M_get_low()),
+                            hypot(__x._M_get_high(), __y._M_get_high()));
+      else
+        static_assert(false, "TODO");
+    }
+
+  _GLIBCXX_SIMD_MATH_CALL2(pow)
+  _GLIBCXX_SIMD_MATH_CALL(sqrt)
+  _GLIBCXX_SIMD_MATH_CALL(erf)
+  _GLIBCXX_SIMD_MATH_CALL(erfc)
+  _GLIBCXX_SIMD_MATH_CALL(lgamma)
+  _GLIBCXX_SIMD_MATH_CALL(tgamma)
+
+  template <__simd_floating_point _Vp, _TargetTraits = {}>
+    inline _Vp
+    __lerp(_Vp __a, _Vp __b, _Vp __t) noexcept
+    {
+      constexpr _Vp __zero = {};
+      constexpr _Vp __one(1);
+      using _Mp = typename _Vp::mask_type;
+
+      // TODO: benchmark which method of computing the mask is better
+#if 1
+      const _Mp __different_sign = (__a * __b <= __zero);
+#elif 0
+      const _Mp __different_sign
+        = (__a <= __zero and __b >= __zero) or (__a >= __zero and __b <= __zero);
+#else
+      using _IV = rebind_t<__integer_from<sizeof(typename _Vp::value_type)>, _Vp>;
+      const _Mp __different_sign
+        = ((bit_cast<_IV>(__a) ^ bit_cast<_IV>(__b)) < _IV()) or __a == __zero or __b == __zero;
+#endif
+
+      const _Vp __r = __t * __b + (__one - __t) * __a; // __r is also exact at __t=1
+      if (all_of(__different_sign))
+        return __r;
+      else
+        {
+          // Exact at __t=0, monotonic except near __t=1,
+          // bounded, determinate, and consistent:
+          const _Vp __x = __a + __t * (__b - __a);
+          return select(__different_sign or __t == __one, __r,
+                        select((__t > __one) == (__b > __a),
+                               select(__b < __x, __x, __b),
+                               select(__b > __x, __x, __b)));  // monotonic near __t=1
+        }
+    }
+
+  template<typename _V0, typename _V1, typename _V2, _TargetTraits _Traits = {}>
+    [[__gnu__::__always_inline__]]
+    constexpr __math_common_simd_t<_V0, _V1, _V2>
+    lerp(const _V0& __a, const _V1& __b, const _V2& __t) noexcept
+    {
+      if constexpr (not is_same_v<_V0, __math_common_simd_t<_V0, _V1, _V2>>
+                      or not is_same_v<_V1, __math_common_simd_t<_V0, _V1, _V2>>
+                      or not is_same_v<_V2, __math_common_simd_t<_V0, _V1, _V2>>)
+        return lerp(static_cast<const __math_common_simd_t<_V0, _V1, _V2>&>(__a),
+                    static_cast<const __math_common_simd_t<_V0, _V1, _V2>&>(__b),
+                    static_cast<const __math_common_simd_t<_V0, _V1, _V2>&>(__t));
+      else if (__builtin_is_constant_evaluated()
+                 or (__a._M_is_constprop() and __b._M_is_constprop() and __t._M_is_constprop()))
+        return _V0([&] [[__gnu__::__always_inline__]] (int __i) {
+                 return std::lerp(__a[__i], __b[__i], __t[__i]);
+               });
+      else if constexpr (_V0::size() == 1)
+        return std::lerp(__a[0], __b[0], __t[0]);
+      else if constexpr (_Traits.template _M_eval_as_f32<typename _V0::value_type>())
+        { // Any operation on value_type converts to float anyway (and back). Thus, it's more
+          // efficient if we can reduce the number of conversions (and use a simpler formula).
+          //
+          // Sketch of a proof that ta+(1-t)a == a for all t (consistency):
+          //
+          // Large t, such that (1-t) == -t would be problematic, but _Float16 max is 65504
+          //   => no issue
+          // Small t, such that (1-t) is inexact would be problematic.
+          // - Consider _Float16 sub-min (2^-24) => 1-2^24, which is exact in binary32.
+          //   => For any number larger than 2^-24, (1-t) cannot be inexact either.
+          // - Consider negative sub-min (-2^-24) => 1+2^24, which is off by .5 ULP in binary32.
+          // - The ULP of the smallest normal negative number also is -2^-24, which leads to the
+          // same .5 ULP loss of precision for (1-t).
+          // This .5 ULP loss does not matter when computing ta+(1-t)a because ta is smaller by
+          // a factor of 2^24 and thus after rounding to binary16, no error is left.
+          using _Vf = rebind_t<float, _V0>;
+          const _Vf __th = _Vf(__t);
+          return _V0(__th * __b + (_Vf(1) - __th) * __a);
+        }
+      else if constexpr (_V0::abi_type::_S_nreg > 1)
+        return _V0::_S_init(lerp(__a._M_get_low(), __b._M_get_low(), __t._M_get_low()),
+                            lerp(__a._M_get_high(), __b._M_get_high(), __t._M_get_high()));
+      else
+        return __lerp(__a, __b, __t);
+    }
+
+  template<__math_floating_point _Vp>
+    __deduced_vec_t<_Vp>
+    assoc_laguerre(const rebind_t<unsigned, __deduced_vec_t<_Vp>>& __n,
+                   const rebind_t<unsigned, __deduced_vec_t<_Vp>>& __m, const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template<__math_floating_point _Vp>
+    __deduced_vec_t<_Vp>
+    assoc_legendre(const rebind_t<unsigned, __deduced_vec_t<_Vp>>& __l,
+                   const rebind_t<unsigned, __deduced_vec_t<_Vp>>& __m, const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template<class _V0, class _V1>
+    __math_common_simd_t<_V0, _V1>
+    beta(const _V0& __x, const _V1& __y)
+    { static_assert(false, "TODO"); }
+
+  template<__math_floating_point _Vp> __deduced_vec_t<_Vp>
+    comp_ellint_1(const _Vp& __k)
+    { static_assert(false, "TODO"); }
+
+  template<__math_floating_point _Vp> __deduced_vec_t<_Vp>
+    comp_ellint_2(const _Vp& __k)
+    { static_assert(false, "TODO"); }
+
+  template<class _V0, class _V1>
+    __math_common_simd_t<_V0, _V1>
+    comp_ellint_3(const _V0& __k, const _V1& __nu)
+    { static_assert(false, "TODO"); }
+
+  template<class _V0, class _V1>
+    __math_common_simd_t<_V0, _V1>
+    cyl_bessel_i(const _V0& __nu, const _V1& __x)
+    { static_assert(false, "TODO"); }
+
+  template<class _V0, class _V1>
+    __math_common_simd_t<_V0, _V1>
+    cyl_bessel_j(const _V0& __nu, const _V1& __x)
+    { static_assert(false, "TODO"); }
+
+  template<class _V0, class _V1>
+    __math_common_simd_t<_V0, _V1>
+    cyl_bessel_k(const _V0& __nu, const _V1& __x)
+    { static_assert(false, "TODO"); }
+
+  template<class _V0, class _V1>
+    __math_common_simd_t<_V0, _V1>
+    cyl_neumann(const _V0& __nu, const _V1& __x)
+    { static_assert(false, "TODO"); }
+
+  template<class _V0, class _V1>
+    __math_common_simd_t<_V0, _V1>
+    ellint_1(const _V0& __k, const _V1& __phi)
+    { static_assert(false, "TODO"); }
+
+  template<class _V0, class _V1>
+    __math_common_simd_t<_V0, _V1>
+    ellint_2(const _V0& __k, const _V1& __phi)
+    { static_assert(false, "TODO"); }
+
+  template<class _V0, class _V1, class _V2>
+    __math_common_simd_t<_V0, _V1, _V2>
+    ellint_3(const _V0& __k, const _V1& __nu, const _V2& __phi)
+    { static_assert(false, "TODO"); }
+
+  template<__math_floating_point _Vp> __deduced_vec_t<_Vp>
+    expint(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template<__math_floating_point _Vp> __deduced_vec_t<_Vp>
+    hermite(const rebind_t<unsigned,
+                           __deduced_vec_t<_Vp>>& __n, const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template<__math_floating_point _Vp> __deduced_vec_t<_Vp>
+    laguerre(const rebind_t<unsigned,
+                            __deduced_vec_t<_Vp>>& __n, const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template<__math_floating_point _Vp> __deduced_vec_t<_Vp>
+    legendre(const rebind_t<unsigned,
+                            __deduced_vec_t<_Vp>>& __l, const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template<__math_floating_point _Vp> __deduced_vec_t<_Vp>
+    riemann_zeta(const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template<__math_floating_point _Vp> __deduced_vec_t<_Vp>
+    sph_bessel(const rebind_t<unsigned,
+                              __deduced_vec_t<_Vp>>& __n, const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template<__math_floating_point _Vp>
+    __deduced_vec_t<_Vp>
+    sph_legendre(const rebind_t<unsigned, __deduced_vec_t<_Vp>>& __l,
+                 const rebind_t<unsigned, __deduced_vec_t<_Vp>>& __m, const _Vp& __theta)
+    { static_assert(false, "TODO"); }
+
+  template<__math_floating_point _Vp> __deduced_vec_t<_Vp>
+    sph_neumann(const rebind_t<unsigned, __deduced_vec_t<_Vp>>& __n, const _Vp& __x)
+    { static_assert(false, "TODO"); }
+
+  template<__math_floating_point _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr __deduced_vec_t<_Vp>
+    frexp(const _Vp& __value, rebind_t<int, __deduced_vec_t<_Vp>>* __exp)
+    { static_assert(false, "TODO"); }
+
+  template<class _V0, class _V1>
+    [[__gnu__::__always_inline__]]
+    constexpr __math_common_simd_t<_V0, _V1>
+    remquo(const _V0& __x, const _V1& __y, rebind_t<int, __math_common_simd_t<_V0, _V1>>* __quo)
+    { static_assert(false, "TODO"); }
+
+  template<class T, class Abi>
+    [[__gnu__::__always_inline__]]
+    constexpr basic_vec<T, Abi>
+    modf(const type_identity_t<basic_vec<T, Abi>>& __value, basic_vec<T, Abi>* __iptr)
+    { static_assert(false, "TODO"); }
+}
+
+// clean up internal macros
+#undef _GLIBCXX_SIMD_HAS_SIMD_CLONE
+#undef _GLIBCXX_SIMD_FN_NAME
+#undef _GLIBCXX_SIMD_MATH_2X_CALL
+#undef _GLIBCXX_SIMD_MATH_2X_CALL2
+#undef _GLIBCXX_SIMD_MATH_RET_TYPE
+#undef _GLIBCXX_SIMD_MATH_CALL
+#undef _GLIBCXX_SIMD_MATH_CALL2
+
+namespace std
+{
+  using simd::acos;
+  using simd::asin;
+  using simd::atan;
+  using simd::atan2;
+  using simd::cos;
+  using simd::sin;
+  using simd::tan;
+  using simd::acosh;
+  using simd::asinh;
+  using simd::atanh;
+  using simd::cosh;
+  using simd::sinh;
+  using simd::tanh;
+  using simd::exp;
+  using simd::exp2;
+  using simd::expm1;
+  using simd::frexp;
+  using simd::ilogb;
+  using simd::ldexp;
+  using simd::log;
+  using simd::log10;
+  using simd::log1p;
+  using simd::log2;
+  using simd::logb;
+  using simd::modf;
+  using simd::scalbn;
+  using simd::scalbln;
+  using simd::cbrt;
+  using simd::abs;
+  using simd::abs;
+  using simd::fabs;
+  using simd::hypot;
+  using simd::pow;
+  using simd::sqrt;
+  using simd::erf;
+  using simd::erfc;
+  using simd::lgamma;
+  using simd::tgamma;
+  using simd::ceil;
+  using simd::floor;
+  using simd::nearbyint;
+  using simd::rint;
+  using simd::lrint;
+  using simd::llrint;
+  using simd::round;
+  using simd::lround;
+  using simd::llround;
+  using simd::trunc;
+  using simd::fmod;
+  using simd::remainder;
+  using simd::remquo;
+  using simd::copysign;
+  using simd::nextafter;
+  using simd::fdim;
+  using simd::fmax;
+  using simd::fmin;
+  using simd::fma;
+  using simd::lerp;
+  using simd::fpclassify;
+  using simd::isfinite;
+  using simd::isinf;
+  using simd::isnan;
+  using simd::isnormal;
+  using simd::signbit;
+  using simd::isgreater;
+  using simd::isgreaterequal;
+  using simd::isless;
+  using simd::islessequal;
+  using simd::islessgreater;
+  using simd::isunordered;
+  using simd::assoc_laguerre;
+  using simd::assoc_legendre;
+  using simd::beta;
+  using simd::comp_ellint_1;
+  using simd::comp_ellint_2;
+  using simd::comp_ellint_3;
+  using simd::cyl_bessel_i;
+  using simd::cyl_bessel_j;
+  using simd::cyl_bessel_k;
+  using simd::cyl_neumann;
+  using simd::ellint_1;
+  using simd::ellint_2;
+  using simd::ellint_3;
+  using simd::expint;
+  using simd::hermite;
+  using simd::laguerre;
+  using simd::legendre;
+  using simd::riemann_zeta;
+  using simd::sph_bessel;
+  using simd::sph_legendre;
+  using simd::sph_neumann;
+}
+
+// [simd.complex.math] --------------------------------------------------------
+namespace std::simd
+{
+  template <__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr rebind_t<__simd_complex_value_type<_Vp>, _Vp>
+    real(const _Vp& __x) noexcept
+    { return __x.real(); }
+
+  template <__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr rebind_t<__simd_complex_value_type<_Vp>, _Vp>
+    imag(const _Vp& __x) noexcept
+    { return __x.imag(); }
+
+  template <__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr rebind_t<__simd_complex_value_type<_Vp>, _Vp>
+    abs(const _Vp& __x) noexcept
+    { return __x._M_abs(); }
+
+  template <__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr rebind_t<__simd_complex_value_type<_Vp>, _Vp>
+    arg(const _Vp& __x) noexcept
+    { return __x._M_arg(); }
+
+  template <__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr rebind_t<__simd_complex_value_type<_Vp>, _Vp>
+    norm(const _Vp& __x) noexcept
+    { return __x._M_norm(); }
+
+  template <__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    conj(const _Vp& __x) noexcept
+    { return __x._M_conj(); }
+
+  template <__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    proj(const _Vp& __x) noexcept
+    { return __x._M_proj(); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    exp(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    log(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    log10(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    sqrt(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    sin(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    asin(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    cos(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    acos(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    tan(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    atan(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    sinh(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    asinh(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    cosh(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    acosh(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    tanh(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    atanh(const _Vp& __v)
+    { static_assert(false, "TODO"); }
+
+  template<__simd_floating_point _Vp>
+    rebind_t<complex<typename _Vp::value_type>, _Vp>
+    polar(const _Vp& __x, const _Vp& __y = {})
+    { static_assert(false, "TODO"); }
+
+  template<__simd_complex _Vp>
+    [[__gnu__::__always_inline__]]
+    constexpr _Vp
+    pow(const _Vp& __x, const _Vp& __y)
+    { static_assert(false, "TODO"); }
+}
+
+namespace std
+{
+  using simd::real;
+  using simd::imag;
+  using simd::arg;
+  using simd::norm;
+  using simd::conj;
+  using simd::proj;
+  using simd::polar;
+}
+
+#pragma GCC diagnostic pop
+#endif // C++26
+#endif // _GLIBCXX_SIMD_MATH_H

Reply via email to