On OpenBSD 5.1/SPARC, I'm seeing this test failure: test-fma2.h:93: assertion failed FAIL: test-fmal2
It turns out that gcc has miscompiled the rpl_fmal function in lib/fmal.c. More precisely, this code fsum += (DOUBLE) ((sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift)) | (sum[sum_len - k - 2] >> shift)); in a situation where the parenthesized expression would evaluate to 2^32 with 64-bit computation, or therefore to 0 when truncated to 'unsigned int' (the sum[] array element type is 'unsigned int'), increments fsum by 262144.0L. It happens even without optimization. (gdb) print shift $38 = 1 (gdb) print fsum $39 = 0 (gdb) next 876 fsum += (DOUBLE) (gdb) print ( sum[sum_len - k - 1] << 31 ) | (sum[sum_len - k - 2] >> 1) $40 = 4294967296 (gdb) print (unsigned int)(( sum[sum_len - k - 1] << 31 ) | (sum[sum_len - k - 2] >> 1)) $41 = 0 (gdb) next 873 while (--k >= 0) (gdb) print fsum $42 = 262144 2012-03-15 Bruno Haible <br...@clisp.org> fmal: Avoid test failure on OpenBSD 5.1/SPARC64. * lib/fma.c (VOLATILE): New macro. (FUNC): Use it to work around a GCC compiler bug. --- lib/fma.c.orig Thu Mar 15 12:09:33 2012 +++ lib/fma.c Thu Mar 15 12:06:55 2012 @@ -72,6 +72,13 @@ # pragma fenv_access (off) #endif +/* Work around GCC 4.2.1 bug on OpenBSD 5.1/SPARC64. */ +#if defined __GNUC__ && defined __sparc__ +# define VOLATILE volatile +#else +# define VOLATILE +#endif + /* It is possible to write an implementation of fused multiply-add with floating-point operations alone. See Sylvie Boldo, Guillaume Melquiond: @@ -866,16 +873,22 @@ else { /* First loop round. */ - fsum = (DOUBLE) - ((sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift)) - | (sum_len >= k + 2 ? sum[sum_len - k - 2] >> shift : 0)); + { + VOLATILE mp_limb_t chunk = + (sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift)) + | (sum_len >= k + 2 ? sum[sum_len - k - 2] >> shift : 0); + fsum = (DOUBLE) chunk; + } /* General loop. */ while (--k >= 0) { fsum *= chunk_multiplier; - fsum += (DOUBLE) - ((sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift)) - | (sum[sum_len - k - 2] >> shift)); + { + VOLATILE mp_limb_t chunk = + (sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift)) + | (sum[sum_len - k - 2] >> shift); + fsum += (DOUBLE) chunk; + } } } }