https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108787
Jakub Jelinek <jakub at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |segher at gcc dot gnu.org
--- Comment #4 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
long random (void);
__attribute__((noipa)) unsigned __int128
foo (unsigned long long x, unsigned long long y, unsigned __int128 z)
{
return (unsigned __int128) x * y + z;
}
__attribute__((noipa)) __int128
bar (long long x, long long y, __int128 z)
{
return (__int128) x * y + z;
}
unsigned long long
baz (void)
{
return (random () & 0x7fffffff) + ((random () & 0x7fffffffLL) << 31) +
((random () & 0x3ULL) << 62);
}
unsigned __int128
qux (void)
{
return ((unsigned __int128) baz () << 64) + baz ();
}
int
main ()
{
for (int i = 0; i < 10000000; ++i)
{
volatile unsigned __int128 x, y;
unsigned __int128 z;
x = baz ();
y = baz ();
z = qux ();
if (foo (x, y, z) != (x * y) + z)
__builtin_printf ("U 0x%016llx * 0x%016llx + 0x%016llx%016llx\n",
(unsigned long long) x, (unsigned long long) y, (unsigned long long) (z >> 64),
(unsigned long long) z);
}
for (int i = 0; i < 10000000; ++i)
{
volatile unsigned __int128 x, y;
unsigned __int128 z;
x = (long long) baz ();
y = (long long) baz ();
z = qux ();
if (bar (x, y, z) != (x * y) + z)
__builtin_printf ("S 0x%016llx * 0x%016llx + 0x%016llx%016llx\n",
(unsigned long long) x, (unsigned long long) y, (unsigned long long) (z >> 64),
(unsigned long long) z);
}
return 0;
}
shows that while it is correct for umaddditi4, it is not correct for maddditi4.
Example of bar arguments which result in different result:
0xffffffffffffffff834a97f995de5fd5 * 0xffffffffffffffff878d5777da196ad2 +
0x630036472f469716e5be2424d91183d8
which computes 0x9dad19ebe2fba1e2351c16459af75292 but should compute
0x9dad19ebe2fba1e3351c16459af75292 instead.
In fact, the incorrect signed results are exactly all those where z has bit
0x8000000000000000ULL set and the result is ((unsigned __int128) 1) << 64
smaller than it should in that case.
Segher, is it worth adding something more complicated for the maddditi4 case or
shall we just drop maddditi4 and only support maddditi4?