https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108787
Jakub Jelinek <jakub at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |segher at gcc dot gnu.org --- Comment #4 from Jakub Jelinek <jakub at gcc dot gnu.org> --- long random (void); __attribute__((noipa)) unsigned __int128 foo (unsigned long long x, unsigned long long y, unsigned __int128 z) { return (unsigned __int128) x * y + z; } __attribute__((noipa)) __int128 bar (long long x, long long y, __int128 z) { return (__int128) x * y + z; } unsigned long long baz (void) { return (random () & 0x7fffffff) + ((random () & 0x7fffffffLL) << 31) + ((random () & 0x3ULL) << 62); } unsigned __int128 qux (void) { return ((unsigned __int128) baz () << 64) + baz (); } int main () { for (int i = 0; i < 10000000; ++i) { volatile unsigned __int128 x, y; unsigned __int128 z; x = baz (); y = baz (); z = qux (); if (foo (x, y, z) != (x * y) + z) __builtin_printf ("U 0x%016llx * 0x%016llx + 0x%016llx%016llx\n", (unsigned long long) x, (unsigned long long) y, (unsigned long long) (z >> 64), (unsigned long long) z); } for (int i = 0; i < 10000000; ++i) { volatile unsigned __int128 x, y; unsigned __int128 z; x = (long long) baz (); y = (long long) baz (); z = qux (); if (bar (x, y, z) != (x * y) + z) __builtin_printf ("S 0x%016llx * 0x%016llx + 0x%016llx%016llx\n", (unsigned long long) x, (unsigned long long) y, (unsigned long long) (z >> 64), (unsigned long long) z); } return 0; } shows that while it is correct for umaddditi4, it is not correct for maddditi4. Example of bar arguments which result in different result: 0xffffffffffffffff834a97f995de5fd5 * 0xffffffffffffffff878d5777da196ad2 + 0x630036472f469716e5be2424d91183d8 which computes 0x9dad19ebe2fba1e2351c16459af75292 but should compute 0x9dad19ebe2fba1e3351c16459af75292 instead. In fact, the incorrect signed results are exactly all those where z has bit 0x8000000000000000ULL set and the result is ((unsigned __int128) 1) << 64 smaller than it should in that case. Segher, is it worth adding something more complicated for the maddditi4 case or shall we just drop maddditi4 and only support maddditi4?