[PATCHv8] libstdc++: Add generate_canonical impl (P0952, LWG2524) [PR119739]

Nathan Myers Fri, 15 Aug 2025 00:19:11 -0700

Changes in v8:
 * Apply LWG 2524 resolution to C++11..C++23 implementations, too.
 * In those, eliminate std::log called twice per entry, and do all
  setup calculations at compile time.
 * "if constexpr" the C++26 version to call the slightly-faster
  older version when conditions permit: actually almost always.


Change in v7:
 * Fix local consteval floor() meant for Clang.

Changes in v6:
 * Implement also for Clang, using local consteval substitutes for its
  non-constexpr __builtin_pow and __builtin_floor.
 * Add a test for long double.

Changes in v5:
 * Static-assert movable RNG object correctly.
 * Add a more comprehensive test gencanon.cc

Changes in v4:
 * Static-assert arg is floating-point, coercible from bigger unsigned.
 * Static-assert arg satisfies uniform_random_bit_generator, movable.
 * Include uniform_int_dist.h for concept uniform_random_bit_generator
 * Coerce floating consts from unsigned literals, matching other usage.
 * Remove redundant type decorations static and -> .
 * Use __builtin_floor for broader applicability than std::floor.
 * Use __builtin_pow in place of immediate lambda.
 * Rename, rearrange local objects for clarity.
 * Add redundant "if constexpr (k > 1)" for clarity.
 * #if-guard new impl against Clang, which lacks constexpr floor() etc.

Changes in v3:
 * Implement exactly as specified in the WP, for C++26 and up.

Implement P0952R2 "A new specification for std::generate_canonical".

It has us start over if the naïve generation of a value in 0..1
comes up exactly equal to 1, which occurs too rarely for the change
to measurably affect performance. The algorithm in 26 more general,
supporting floating point types other than radix-2.

The fix is extended to all of C++11/14/17/20/23/26, though only
C++26 gets support for non-radix-2 floats. (Yay z-series?)
When conditions permit, the 26 version calls the old, slightly
faster implementation.

This patch adds a new, exact, thorough test for statistical
properties, on all standard releases. It adds a new static_assert
on the RNG requirements for C++26.

A test for the case addressed in P0952 already appears in 64351.cc.
This change amounts to a different resolution for PR64351 and LWG 2524.

libstdc++-v3/ChangeLog:
        PR libstdc++/119739
        * include/bits/random.tcc: Add generate_canonical impl for C++26.
        * include/std/random: Include uniform_int_dist.h for concept.
        * 
testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc:
        Adapt test for both pre- and post- C++26.
        * 
testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc:
        Test for float and double from 32-bit RNG, including 1.0 do-over.
---
 libstdc++-v3/include/bits/random.tcc          | 151 ++++++++++++++----
 libstdc++-v3/include/std/random               |   1 +
 .../operators/64351.cc                        |  25 +--
 .../operators/gencanon.cc                     | 107 +++++++++++++
 4 files changed, 234 insertions(+), 50 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc

diff --git a/libstdc++-v3/include/bits/random.tcc 
b/libstdc++-v3/include/bits/random.tcc
index 53ccacb2e38..6a9416d3bbf 100644
--- a/libstdc++-v3/include/bits/random.tcc
+++ b/libstdc++-v3/include/bits/random.tcc
@@ -3349,43 +3349,134 @@ namespace __detail
        }
     }
 
-  template<typename _RealType, size_t __bits,
-          typename _UniformRandomNumberGenerator>
+
+  // This one works only when float radix == 2 and the RNG returns [0..2^n),
+  // but is slightly faster than the P0952 version, and is C++11.
+
+  template<typename _RealType, size_t __bits, typename _URBG>
     _RealType
-    generate_canonical(_UniformRandomNumberGenerator& __urng)
+    __generate_canonical_base_2(_URBG& __urng)
     {
-      static_assert(std::is_floating_point<_RealType>::value,
-                   "template argument must be a floating point type");
-
-      const size_t __b
-       = std::min(static_cast<size_t>(std::numeric_limits<_RealType>::digits),
-                   __bits);
-      const long double __r = static_cast<long double>(__urng.max())
-                           - static_cast<long double>(__urng.min()) + 1.0L;
-      const size_t __log2r = std::log(__r) / std::log(2.0L);
-      const size_t __m = std::max<size_t>(1UL,
-                                         (__b + __log2r - 1UL) / __log2r);
+      const size_t __b_max = std::numeric_limits<_RealType>::digits;
+      const size_t __b = __bits < __b_max ? __bits : __b_max;
+      const unsigned long long __range = _URBG::max() - _URBG::min();
+      const size_t __ullsize = std::numeric_limits<unsigned long long>::digits;
+      const size_t __log2r = __ullsize - __builtin_clzll(__range);
+      const _RealType __r = (_RealType)(__range) + 1.0L;
+      const int __m_min = (__b + __log2r - 1UL) / __log2r;
+      const int __m = __m_min < 1 ? 1 : __m_min;
+
       _RealType __ret;
-      _RealType __sum = _RealType(0);
-      _RealType __tmp = _RealType(1);
-      for (size_t __k = __m; __k != 0; --__k)
-       {
-         __sum += _RealType(__urng() - __urng.min()) * __tmp;
-         __tmp *= __r;
-       }
-      __ret = __sum / __tmp;
-      if (__builtin_expect(__ret >= _RealType(1), 0))
+      do
        {
-#if _GLIBCXX_USE_C99_MATH_FUNCS
-         __ret = std::nextafter(_RealType(1), _RealType(0));
-#else
-         __ret = _RealType(1)
-           - std::numeric_limits<_RealType>::epsilon() / _RealType(2);
-#endif
-       }
+         _RealType __tmp = _RealType(__r);
+         _RealType __sum = _RealType(__urng() - __urng.min());
+         for (int __k = 1; __k < __m; ++__k, __tmp *= __r)
+            __sum += _RealType(__urng() - __urng.min()) * __tmp;
+         __ret = __sum / __tmp;
+       } while (__builtin_expect(__ret >= _RealType(1), 0));
       return __ret;
     }
 
+#if __cplusplus < 202400
+
+  template<typename _RealType, size_t __bits,
+          typename _UniformRandomNumberGenerator>
+    _RealType
+    __generate_canonical_base_2(_UniformRandomNumberGenerator& __urng)
+    {
+      static_assert(std::is_floating_point<_RealType>::value,
+       "template argument must be a floating point type");
+      static_assert(std::numeric_limits<_RealT>::radix == 2,
+       "floating point type must be radix 2";
+
+      return __generate_canonical_base_2<_RealType, __bits>(__urng);
+    }
+
+
+#else // C++26, conform to P0952.
+
+# ifndef __clang__
+  template <typename _Tp>
+    consteval _Tp
+    __gen_con_pow(_Tp __base, size_t __exp)
+      { return __builtin_pow(__base, __exp); }
+  template <typename _Tp>
+    consteval _Tp
+    __gen_con_floor(_Tp __x)
+      { return __builtin_floor(__x); }
+# else // Clang lacks constexpr pow and floor.
+  template <typename _Tp>
+    consteval _Tp
+    __gen_con_pow(_Tp __base, size_t __exponent)
+    {
+      _Tp __prod = __base;
+      while (--__exponent != 0) __prod *= __base;
+      return __prod;
+    }
+  template <typename _Tp>
+    consteval _Tp
+    __gen_con_floor(_Tp __x)
+      {
+       unsigned long long __tmp = __x;
+       return __tmp - (__tmp > __x);
+      }
+# endif // __clang__
+
+  template<typename _RealT, size_t __digits, typename _URBG>
+    _RealT
+    generate_canonical(_URBG& __urng)
+    {
+      static_assert(requires {
+       [](uniform_random_bit_generator auto) {}(
+           static_cast<std::remove_cv_t<_URBG>&&>(__urng)); },
+       "argument must meet uniform_random_bit_generator requirements");
+      static_assert(
+       std::is_floating_point_v<_RealT> && requires { _RealT(_URBG::max()); },
+       "template argument must be floating point, coercible from unsigned");
+
+      const size_t  __r = std::numeric_limits<_RealT>::radix;
+
+      const auto __range = _URBG::max() - _URBG::min();
+      if constexpr (__r == 2 && ((__range + 1) & __range == 0) 
+       return __generate_canonical_base_2<_RealType, __bits>(__urng);
+      else
+      {
+       // Names below are chosen to match the description in the Standard.
+       const size_t  __r = std::numeric_limits<_RealT>::radix;
+       const auto  __sample = [](auto __high) constexpr
+         { return _RealT(__high - _URBG::min()); };
+       constexpr _RealT  __R = _RealT(1) + __sample(_URBG::max());
+       const size_t  __d_max = std::numeric_limits<_RealT>::digits;
+       const size_t  __d = __digits < __d_max ? __digits : __d_max;
+       constexpr _RealT  __rd = __gen_con_pow(_RealT(__r), __d);
+       const unsigned  __k = [](_RealT __R, _RealT __rd) consteval
+         {
+           unsigned __i = 1;
+           for (auto __Ri = __R; __Ri < __rd; __Ri *= __R)
+             ++__i;
+           return __i;
+         } (__R, __rd);
+       constexpr _RealT  __Rk = __gen_con_pow(__R, __k);
+       constexpr _RealT  __x = __gen_con_floor(__Rk / __rd);
+       static_assert((__x <= __Rk / __rd) && (__x + 1 > __Rk / __rd));
+       constexpr _RealT  __xrd = __x * __rd;
+
+       _RealT __sum;
+       do
+         {
+           _RealT __Ri = _RealT(1);
+           __sum = __sample(__urng());
+           for (int __i = 1; __i != __k; ++__i)
+             __Ri *= __R, __sum += __sample(__urng()) * __Ri;
+         } while (__builtin_expect( __sum >= __xrd, false ));
+       const _RealT __ret = __builtin_floor(__sum / __x) / __rd;
+       return __ret;
+      }
+    }
+
+#endif // C++26
+
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace
 
diff --git a/libstdc++-v3/include/std/random b/libstdc++-v3/include/std/random
index 0e058a04bd9..86d4f0a8380 100644
--- a/libstdc++-v3/include/std/random
+++ b/libstdc++-v3/include/std/random
@@ -49,6 +49,7 @@
 #include <type_traits>
 #include <bits/random.h>
 #include <bits/opt_random.h>
+#include <bits/uniform_int_dist.h>
 #include <bits/random.tcc>
 
 #endif // C++11
diff --git 
a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc
 
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc
index 3c0cb8bbd49..a5c8302f7b3 100644
--- 
a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc
+++ 
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc
@@ -21,21 +21,6 @@
 #include <random>
 #include <testsuite_hooks.h>
 
-// libstdc++/64351
-void
-test01()
-{
-  std::mt19937 rng(8890);
-  std::uniform_real_distribution<float> dist;
-
-  rng.discard(30e6);
-  for (long i = 0; i < 10e6; ++i)
-    {
-      auto n = dist(rng);
-      VERIFY( n != 1.f );
-    }
-}
-
 // libstdc++/63176
 void
 test02()
@@ -45,21 +30,21 @@ test02()
   rng.seed(sequence);
   rng.discard(12 * 629143);
   std::mt19937 rng2{rng};
+  int mismatch = 0;
   for (int i = 0; i < 20; ++i)
   {
-    float n =
-      std::generate_canonical<float, std::numeric_limits<float>::digits>(rng);
+    float n = std::generate_canonical<float, 100>(rng);
     VERIFY( n != 1.f );
 
-    // PR libstdc++/80137
     rng2.discard(1);
-    VERIFY( rng == rng2 );
   }
+  // PR libstdc++/80137
+  rng2.discard(1);  // account for a 1.0 generated and discarded.
+  VERIFY( rng == rng2 );
 }
 
 int
 main()
 {
-  test01();
   test02();
 }
diff --git 
a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc
 
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc
new file mode 100644
index 00000000000..ff2b1ba10dc
--- /dev/null
+++ 
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc
@@ -0,0 +1,107 @@
+// { dg-do run { target { c++11 && { ! simulator } } } }
+
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <cmath>
+#include <testsuite_hooks.h>
+
+// Verify P0952R9 implementation requiring a second round-trip
+// if first yields exactly 1. In this test, the RNG delivering
+// 32 bits per call is seeded such that this occurs once on the
+// sixth iteration for float, and not at all for double.
+// However, each double iteration requires two calls to the RNG.
+
+template <typename T, typename RNG>
+void
+test01(RNG& rng, RNG& rng2, int& deviation, int& max, int& rms, int& zero, 
int& one, int& skips)
+{
+  const auto size = 1000000, buckets = 100;
+  std::array<int, buckets> histo{};
+  for (auto i = 0; i != size; ++i) {
+    T sample = std::generate_canonical<T, 100>(rng);
+    VERIFY(sample >= 0.0);
+    VERIFY(sample < 1.0);  // libstdc++/64351
+    if (sample == 0.0) {
+      ++zero;
+    }
+    if (sample == 1.0) {
+      ++one;
+    }
+    auto bucket = static_cast<int>(std::floor(sample * buckets));
+    ++histo[bucket];
+    rng2.discard(1);
+    if (rng != rng2) {
+      ++skips;
+      rng2.discard(1);
+      VERIFY(rng == rng2);
+    }
+  }
+  int devsquare = 0;
+  for (int i = 0; i < buckets; ++i) {
+    const auto expected = size / buckets;
+    auto count = histo[i];
+    auto diff = count - expected;
+    if (diff < 0) diff = -diff;
+    deviation += diff;
+    devsquare += diff * diff;
+    if (diff > max) max = diff;
+  }
+  rms = std::sqrt(devsquare);
+}
+
+int
+main()
+{
+  std::mt19937 rng(8890);
+  std::seed_seq sequence{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  rng.seed(sequence);
+  rng.discard(12 * 629143);
+
+  { // float
+    int deviation{}, max{}, rms{}, zero{}, one{}, skips{};
+    auto rng2{rng};
+    auto rng3{rng};
+    test01<float>(rng2, rng3, deviation, max, rms, zero, one, skips);
+
+    if (std::numeric_limits<float>::is_iec559) {
+      VERIFY(deviation == 7032);
+      VERIFY(max == 276);
+      VERIFY(rms == 906);
+      VERIFY(zero == 0);
+      VERIFY(one == 0);
+    }
+    VERIFY(skips == 1);
+  }
+  { // double
+    int deviation{}, max{}, rms{}, zero{}, one{}, skips{};
+    auto rng2{rng};
+    auto rng3{rng};
+    test01<double>(rng2, rng3, deviation, max, rms, zero, one, skips);
+
+    if (std::numeric_limits<double>::is_iec559) {
+      VERIFY(deviation == 7650);
+      VERIFY(max == 259);
+      VERIFY(rms == 975);
+      VERIFY(zero == 0);
+      VERIFY(one == 0);
+    }
+    VERIFY(skips == 1000000);
+  }
+  { // long double, same answers as double
+    int deviation{}, max{}, rms{}, zero{}, one{}, skips{};
+    auto rng2{rng};
+    auto rng3{rng};
+    test01<long double>(rng2, rng3, deviation, max, rms, zero, one, skips);
+
+    if (std::numeric_limits<double>::is_iec559) {
+      VERIFY(deviation == 7650);
+      VERIFY(max == 259);
+      VERIFY(rms == 975);
+      VERIFY(zero == 0);
+      VERIFY(one == 0);
+    }
+    VERIFY(skips == 1000000);
+  }
+  return 0;
+}
-- 
2.50.0

[PATCHv8] libstdc++: Add generate_canonical impl (P0952, LWG2524) [PR119739]

Reply via email to