Am Do., 11. März 2021 um 18:17 Uhr schrieb Patrick Palka via Libstdc++ <libstd...@gcc.gnu.org>: > > This implements a minimal integer class type that emulates 128-bit > unsigned arithmetic using a pair of 64-bit integers, which the > floating-point std::to_chars implementation then uses as a drop-in > replacement for unsigned __int128 on targets that lack the latter. > This allows us to fully support formatting of large long double types > on targets that lack __int128. > > Since Ryu performs 128-bit division/modulus only by 2, 5 and 10, the > integer class type supports only these divisors rather than supporting > general division/modulus. > > Tested on x86, x86_64, ppc64le, ppc64be and aarch64, with and without > performing the equivalent of -U__SIZEOF_INT128__ in floating_to_chars.cc > (so that we also test using the class type on targets when __int128 is > available). > > libstdc++-v3/ChangeLog: > > * src/c++17/floating_to_chars.cc: Simplify the file as if > __SIZEOF_INT128__ is always defined. > [!defined __SIZEOF_INT128__]: Include "uint128_t.h". Define > a to_chars overload for the uint128_t class type. > * src/c++17/uint128_t.h: New file. > * testsuite/20_util/to_chars/long_double.cc: No longer expect an > execution FAIL on targets that have a large long double type > but lack __int128. > --- > libstdc++-v3/src/c++17/floating_to_chars.cc | 58 ++-- > libstdc++-v3/src/c++17/uint128_t.h | 297 ++++++++++++++++++ > .../testsuite/20_util/to_chars/long_double.cc | 1 - > 3 files changed, 332 insertions(+), 24 deletions(-) > create mode 100644 libstdc++-v3/src/c++17/uint128_t.h > > diff --git a/libstdc++-v3/src/c++17/floating_to_chars.cc > b/libstdc++-v3/src/c++17/floating_to_chars.cc > index da3fbaa1ed1..86f4401e134 100644 > --- a/libstdc++-v3/src/c++17/floating_to_chars.cc > +++ b/libstdc++-v3/src/c++17/floating_to_chars.cc > @@ -64,25 +64,19 @@ extern "C" int __sprintfieee128(char*, const char*, ...); > > #if __LDBL_MANT_DIG__ == __DBL_MANT_DIG__ > # define LONG_DOUBLE_KIND LDK_BINARY64 > -#elif defined(__SIZEOF_INT128__) > -// The Ryu routines need a 128-bit integer type in order to do shortest > -// formatting of types larger than 64-bit double, so without __int128 we > can't > -// support any large long double format. This is the case for e.g. i386. > -# if __LDBL_MANT_DIG__ == 64 > +#elif __LDBL_MANT_DIG__ == 64 > # define LONG_DOUBLE_KIND LDK_FLOAT80 > -# elif __LDBL_MANT_DIG__ == 113 > -# define LONG_DOUBLE_KIND LDK_BINARY128 > -# elif __LDBL_MANT_DIG__ == 106 > -# define LONG_DOUBLE_KIND LDK_IBM128 > -# endif > -# if defined _GLIBCXX_USE_FLOAT128 && __FLT128_MANT_DIG__ == 113 > -// Define overloads of std::to_chars for __float128. > -# define FLOAT128_TO_CHARS 1 > -# endif > +#elif __LDBL_MANT_DIG__ == 113 > +# define LONG_DOUBLE_KIND LDK_BINARY128 > +#elif __LDBL_MANT_DIG__ == 106 > +# define LONG_DOUBLE_KIND LDK_IBM128 > +#else > +# define LONG_DOUBLE_KIND LDK_UNSUPPORTED > #endif > > -#if !defined(LONG_DOUBLE_KIND) > -# define LONG_DOUBLE_KIND LDK_UNSUPPORTED > +#if defined _GLIBCXX_USE_FLOAT128 && __FLT128_MANT_DIG__ == 113 > +// Define overloads of std::to_chars for __float128. > +# define FLOAT128_TO_CHARS 1 > #endif > > // For now we only support __float128 when it's the powerpc64 __ieee128 type. > @@ -100,6 +94,8 @@ namespace > { > #if defined __SIZEOF_INT128__ > using uint128_t = unsigned __int128; > +#else > +# include "uint128_t.h" > #endif > > namespace ryu > @@ -114,7 +110,6 @@ namespace > #include "ryu/d2fixed.c" > #include "ryu/f2s.c" > > -#ifdef __SIZEOF_INT128__ > namespace generic128 > { > // Put the generic Ryu bits in their own namespace to avoid name > conflicts. > @@ -129,7 +124,6 @@ namespace > int > to_chars(const floating_decimal_128 v, char* const result) > { return generic128::generic_to_chars(v, result); } > -#endif > } // namespace ryu > > // A traits class that contains pertinent information about the binary > @@ -407,10 +401,8 @@ namespace > return uint32_t{}; > else if constexpr (total_bits <= 64) > return uint64_t{}; > -#ifdef __SIZEOF_INT128__ > else if constexpr (total_bits <= 128) > return uint128_t{}; > -#endif > }; > using uint_t = decltype(get_uint_t()); > uint_t value_bits = 0; > @@ -503,7 +495,6 @@ namespace > return ryu::floating_to_fd32(value); > else if constexpr (std::is_same_v<T, double>) > return ryu::floating_to_fd64(value); > -#ifdef __SIZEOF_INT128__ > else if constexpr (std::is_same_v<T, long double> > || std::is_same_v<T, F128_type>) > { > @@ -519,7 +510,6 @@ namespace > mantissa_bits, exponent_bits, > !has_implicit_leading_bit); > } > -#endif > } > > // This subroutine returns true if the shortest scientific form fd is a > @@ -558,10 +548,32 @@ namespace > get_mantissa_length(const ryu::floating_decimal_64 fd) > { return ryu::decimalLength17(fd.mantissa); } > > -#ifdef __SIZEOF_INT128__ > int > get_mantissa_length(const ryu::floating_decimal_128 fd) > { return ryu::generic128::decimalLength(fd.mantissa); } > + > +#if !defined __SIZEOF_INT128__ > + // An implementation of base-10 std::to_chars for uint128_t on targets that > + // lack __int128. > + std::to_chars_result > + to_chars(char* first, char* last, uint128_t x) > + { > + const int len = ryu::generic128::decimalLength(x); > + if (last - first < len) > + return {last, std::errc::value_too_large}; > + if (x == 0) > + { > + *first++ = '0'; > + return {first, std::errc{}}; > + } > + for (int i = 0; i < len; ++i) > + { > + first[len - 1 - i] = '0' + static_cast<char>(x % 10); > + x /= 10; > + } > + __glibcxx_assert(x == 0); > + return {first + len, std::errc{}}; > + } > #endif > } // anon namespace > > diff --git a/libstdc++-v3/src/c++17/uint128_t.h > b/libstdc++-v3/src/c++17/uint128_t.h > new file mode 100644 > index 00000000000..90ebae2ffd2 > --- /dev/null > +++ b/libstdc++-v3/src/c++17/uint128_t.h > @@ -0,0 +1,297 @@ > +// A relatiely minimal unsigned 128-bit integer class type, used by the > +// floating-point std::to_chars implementation on targets that lack __int128. > + > +// Copyright (C) 2021 Free Software Foundation, Inc. > +// > +// This file is part of the GNU ISO C++ Library. This library is free > +// software; you can redistribute it and/or modify it under the > +// terms of the GNU General Public License as published by the > +// Free Software Foundation; either version 3, or (at your option) > +// any later version. > + > +// This library is distributed in the hope that it will be useful, > +// but WITHOUT ANY WARRANTY; without even the implied warranty of > +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > +// GNU General Public License for more details. > + > +// Under Section 7 of GPL version 3, you are granted additional > +// permissions described in the GCC Runtime Library Exception, version > +// 3.1, as published by the Free Software Foundation. > + > +// You should have received a copy of the GNU General Public License and > +// a copy of the GCC Runtime Library Exception along with this program; > +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > +// <http://www.gnu.org/licenses/>. > + > +struct uint128_t > +{ > +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ > + uint64_t lo, hi; > +#else > + uint64_t hi, lo; > +#endif > + > + uint128_t() = default; > + > + constexpr > + uint128_t(uint64_t lo, uint64_t hi = 0) > +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ > + : lo(lo), hi(hi) > +#else > + : hi(hi), lo(lo) > +#endif > + { } > + > + constexpr explicit > + operator bool() const > + { return *this != 0; } > + > + template<typename T, typename = std::enable_if_t<std::is_integral_v<T>>> > + constexpr explicit > + operator T() const > + { > + static_assert(sizeof(T) <= sizeof(uint64_t)); > + return static_cast<T>(lo); > + } > + > + friend constexpr uint128_t > + operator&(uint128_t x, const uint128_t y) > + { > + x.lo &= y.lo; > + x.hi &= y.hi; > + return x; > + } > + > + friend constexpr uint128_t > + operator|(uint128_t x, const uint128_t y) > + { > + x.lo |= y.lo; > + x.hi |= y.hi; > + return x; > + } > + > + friend constexpr uint128_t > + operator<<(uint128_t x, const uint128_t y) > + { > + __glibcxx_assert(y < 128); > + // TODO: Convince GCC to use shldq on x86 here. > + if (y.lo >= 64) > + { > + x.hi = x.lo << (y.lo - 64); > + x.lo = 0; > + } > + else if (y.lo != 0) > + { > + x.hi <<= y.lo; > + x.hi |= x.lo >> (64 - y.lo); > + x.lo <<= y.lo; > + } > + return x; > + } > + > + friend constexpr uint128_t > + operator>>(uint128_t x, const uint128_t y) > + { > + __glibcxx_assert(y < 128); > + // TODO: Convince GCC to use shrdq on x86 here. > + if (y.lo >= 64) > + { > + x.lo = x.hi >> (y.lo - 64); > + x.hi = 0; > + } > + else if (y.lo != 0) > + { > + x.lo >>= y.lo; > + x.lo |= x.hi << (64 - y.lo); > + x.hi >>= y.lo; > + } > + return x; > + } > + > + constexpr uint128_t > + operator~() const > + { return {~lo, ~hi}; } > + > + constexpr uint128_t > + operator-() const > + { return operator~() + 1; } > + > + friend constexpr uint128_t > + operator+(uint128_t x, const uint128_t y) > + { > + x.hi += __builtin_add_overflow(x.lo, y.lo, &x.lo); > + x.hi += y.hi; > + return x; > + } > + > + friend constexpr uint128_t > + operator-(uint128_t x, const uint128_t y) > + { > + x.hi -= __builtin_sub_overflow(x.lo, y.lo, &x.lo); > + x.hi -= y.hi; > + return x; > + } > + > + static constexpr uint128_t > + umul64_64_128(const uint64_t x, const uint64_t y) > + { > + const uint64_t xl = x & 0xffffffff; > + const uint64_t xh = x >> 32; > + const uint64_t yl = y & 0xffffffff; > + const uint64_t yh = y >> 32; > + const uint64_t ll = xl * yl; > + const uint64_t lh = xl * yh; > + const uint64_t hl = xh * yl; > + const uint64_t hh = xh * yh; > + const uint64_t m = (ll >> 32) + lh + (hl & 0xffffffff); > + const uint64_t l = (ll & 0xffffffff ) | (m << 32); > + const uint64_t h = (m >> 32) + (hl >> 32) + hh; > + return {l, h}; > + } > + > + friend constexpr uint128_t > + operator*(uint128_t x, const uint128_t y) > + { > + uint128_t z = umul64_64_128(x.lo, y.lo); > + z.hi += x.hi*y.lo + y.hi*x.lo; > + return z; > + } > + > + friend constexpr uint128_t > + operator/(const uint128_t x, const uint128_t y) > + { > + // Ryu performs 128-bit division only by 5 and 10, so that's what we > + // implement. The strategy here is to relate division of x with that of > + // x.hi and x.lo separately. > + __glibcxx_assert(y == 5 || y == 10); > + // The following implements division by 5 and 10. In either case, we > + // first compute division by 5: > + // x/5 = (x.hi*2^64 + x.lo)/5 > + // = (x.hi*(2^64-1) + x.hi + x.lo)/5 > + // = x.hi*((2^64-1)/5) + (x.hi + x.lo)/5 since CST=(2^64-1)/5 is > exact > + // = x.hi*CST + x.hi/5 + x.lo/5 + ((x.lo%5) + (x.hi%5) >= 5) > + // We go a step further and replace the last adjustment term with a > + // lookup table, which we encode as a binary literal. This seems to > + // yield smaller code on x86 at least. > + constexpr auto cst = ~uint64_t(0) / 5; > + uint128_t q = uint128_t{x.hi}*cst + uint128_t{x.hi/5 + x.lo/5}; > + constexpr auto lookup = 0b111100000u; > + q += (lookup >> ((x.hi % 5) + (x.lo % 5))) & 1; > + if (y == 10) > + q >>= 1; > + return q; > + } > + > + friend constexpr uint128_t > + operator%(uint128_t x, const uint128_t y) > + { > + // Ryu performs 128-bit modulus only by 2, 5 and 10, so that's what we > + // implement. The strategy here is to relate modulus of x with that of > + // x.hi and x.lo separately. > + if (y == 2) > + return x & 1; > + __glibcxx_assert(y == 5 || y == 10); > + // The following implements modulus by 5 and 10. In either case, > + // we first compute modulus by 5: > + // x (mod 5) = x.hi*2^64 + x.lo (mod 5) > + // = x.hi + x.lo (mod 5) since 2^64 ≡ 1 (mod 5) > + // So the straightforward implementation would be > + // ((x.hi % 5) + (x.lo % 5)) % 5 > + // But we go a step further and replace the outermost % with a > + // lookup table: > + // = {0,1,2,3,4,0,1,2,3}[(x.hi % 5) + (x.lo % 5)] (mod 5) > + // which we encode as an octal literal. > + constexpr auto lookup = 0321043210u; > + auto r = (lookup >> 3*((x.hi % 5) + (x.lo % 5))) & 7; > + if (y == 10) > + // x % 10 = (x % 5) if x / 5 is even > + // (x % 5) + 5 if x / 5 is odd > + // The compiler should be able to CSE the below computation of x/5 and > + // the above modulus operations with a nearby inlined computation of > x/10. > + r += ((x / 5).lo & 1) * 5; > + return r; > + } > + > + friend constexpr bool > + operator==(const uint128_t x, const uint128_t y) > + { return x.hi == y.hi && x.lo == y.lo; } > + > + friend constexpr bool > + operator<(const uint128_t x, const uint128_t y) > + { return x.hi < y.hi || (x.hi == y.hi && x.lo < y.lo); } > + > + friend constexpr auto > + __bit_width(const uint128_t x) > + { > + if (auto w = std::__bit_width(x.hi)) > + return w + 64; > + else > + return std::__bit_width(x.lo); > + } > + > + friend constexpr auto > + __countr_zero(const uint128_t x) > + { > + auto c = std::__countr_zero(x.lo); > + if (c == 64) > + return 64 + std::__countr_zero(x.hi); > + else > + return c; > + } > + > + constexpr uint128_t& > + operator--() > + { return *this -= 1; } > + > + constexpr uint128_t& > + operator++() > + { return *this += 1; } > + > + constexpr uint128_t& > + operator+=(const uint128_t y) > + { return *this = *this + y; } > + > + constexpr uint128_t& > + operator-=(const uint128_t y) > + { return *this = *this - y; } > + > + constexpr uint128_t& > + operator*=(const uint128_t y) > + { return *this = *this * y; } > + > + constexpr uint128_t& > + operator<<=(const uint128_t y) > + { return *this = *this << y; } > + > + constexpr uint128_t& > + operator>>=(const uint128_t y) > + { return *this = *this >> y; } > + > + constexpr uint128_t& > + operator|=(const uint128_t y) > + { return *this = *this | y; } > + > + constexpr uint128_t& > + operator&=(const uint128_t y) > + { return *this = *this & y; } > + > + constexpr uint128_t& > + operator%=(const uint128_t y) > + { return *this = *this % y; } > + > + constexpr uint128_t& > + operator/=(const uint128_t y) > + { return *this = *this / y; } > + > + friend constexpr bool > + operator!=(const uint128_t x, const uint128_t y) > + { return !(x == y); } > + > + friend constexpr bool > + operator>(const uint128_t x, const uint128_t y) > + { return y < x; } > + > + friend constexpr bool > + operator>=(const uint128_t x, const uint128_t y) > + { return !(x < y); } > +}; > diff --git a/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc > b/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc > index da847ae5401..5c1f7136f21 100644 > --- a/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc > +++ b/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc > @@ -18,7 +18,6 @@ > // <charconv> is supported in C++14 as a GNU extension, but this test uses > C++17 > // hexadecimal floating-point literals. > // { dg-do run { target c++17 } } > -// { dg-xfail-run-if "Ryu needs __int128" { large_long_double && { ! int128 > } } } > // { dg-require-effective-target ieee-floats }
It seems to me that basically all uint128_t operations should be unconditionally noexcept. Currently none of them is so (The constexpr keyword is a red herring in this regard). Is it worth considering to add (conditional) support for operator<=> and corresponding simplifications of comparison operators? Thanks, - Daniel