https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77776
--- Comment #22 from Matthias Kretz (Vir) <mkretz at gcc dot gnu.org> ---
I took your hypot3_scale and reduced latency and throughput. I don't think the
sqrtmax/sqrtmin limits are correct (sqrtmax² * 3 -> infinity).
TYPE Latency Speedup Throughput
Speedup
[cycles/call] [per value] [cycles/call] [per
value]
float, 46.5 1 12.7
1
float, hypot3_scale 35.5 1.31 27
0.47
float, hypot3_mkretz 30.2 1.54 12
1.06
--------------------------------------------------------------------------------------
TYPE Latency Speedup Throughput
Speedup
[cycles/call] [per value] [cycles/call] [per
value]
double, 59.6 1 60
1
double, hypot3_scale 51.2 1.16 48.8
1.23
double, hypot3_mkretz 40.1 1.49 35
1.71
template <typename T>
constexpr T
hypot(T x, T y, T z)
{
using limits = std::numeric_limits<T>;
auto prev_power2 = [](const T value) constexpr noexcept -> T
{
return std::exp2(std::floor(std::log2(value)));
};
constexpr T sqrtmax = std::sqrt(limits::max());
constexpr T sqrtmin = std::sqrt(limits::min());
constexpr T scale_up = prev_power2(sqrtmax);
constexpr T scale_down = T(1) / scale_up;
constexpr T zero = 0;
if (not (std::isnormal(x) && std::isnormal(y) && std::isnormal(z)))
[[unlikely]]
{
if (std::isinf(x) | std::isinf(y) | std::isinf(z))
return limits::infinity();
else if (std::isnan(x) | std::isnan(y) | std::isnan(z))
return limits::quiet_NaN();
const bool xz = x == zero;
const bool yz = y == zero;
const bool zz = z == zero;
if (xz)
{
if (yz)
return zz ? zero : z;
else if (zz)
return y;
}
else if (yz && zz)
return x;
}
x = std::abs(x);
y = std::abs(y);
z = std::abs(z);
T a = std::max(std::max(x, y), z);
T b = std::min(std::max(x, y), z);
T c = std::min(x, y);
if (a >= sqrtmin && a <= sqrtmax) [[likely]]
return std::sqrt(__builtin_assoc_barrier(c * c + b * b) + a * a);
const T scale = a >= sqrtmin ? scale_down : scale_up;
a *= scale;
b *= scale;
c *= scale;
return std::sqrt(__builtin_assoc_barrier(c * c + b * b) + a * a) / scale;
}