https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106484
--- Comment #5 from rsaxvc at gmail dot com --- Related ticket requested with Clang: https://github.com/llvm/llvm-project/issues/63731 latest umulh() function is a little shorter: static uint64_t umulh(uint64_t a, uint64_t b) { const uint32_t a_lo = a; const uint32_t a_hi = a >> 32; const uint32_t b_lo = b; const uint32_t b_hi = b >> 32; /* FOIL method of multiplication See https://en.wikipedia.org/wiki/FOIL_method, but instead of binomials with constants a,b,c,d variables x,y: (ax+b) * (cy + d), consider it with variables a,b,c,d, constants x,y = 1<<32 Results in one UMULL or UMLAL(when merged with accumulate below) per multiply*/ const uint64_t acc0 = (uint64_t)a_lo * b_lo; const uint64_t acc1 = (uint64_t)a_hi * b_lo; const uint64_t acc2 = (uint64_t)a_lo * b_hi; const uint64_t acc3 = (uint64_t)a_hi * b_hi; /* Accumulate the results, keeping only the top 64-bits of the 128-bit result*/ uint64_t acc = acc0; acc >>= 32; acc += acc1; acc += acc2; acc >>= 32; acc += acc3; return acc; }