On OpenBSD 5.1/SPARC, I'm seeing this test failure:
test-fma2.h:93: assertion failed
FAIL: test-fmal2
It turns out that gcc has miscompiled the rpl_fmal function in lib/fmal.c.
More precisely, this code
fsum += (DOUBLE)
((sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
| (sum[sum_len - k - 2] >> shift));
in a situation where the parenthesized expression would evaluate to
2^32 with 64-bit computation, or therefore to 0 when truncated to
'unsigned int' (the sum[] array element type is 'unsigned int'), increments
fsum by 262144.0L. It happens even without optimization.
(gdb) print shift
$38 = 1
(gdb) print fsum
$39 = 0
(gdb) next
876 fsum += (DOUBLE)
(gdb) print ( sum[sum_len - k - 1] << 31 ) | (sum[sum_len - k - 2] >> 1)
$40 = 4294967296
(gdb) print (unsigned int)(( sum[sum_len - k - 1] << 31 ) | (sum[sum_len - k -
2] >> 1))
$41 = 0
(gdb) next
873 while (--k >= 0)
(gdb) print fsum
$42 = 262144
2012-03-15 Bruno Haible <[email protected]>
fmal: Avoid test failure on OpenBSD 5.1/SPARC64.
* lib/fma.c (VOLATILE): New macro.
(FUNC): Use it to work around a GCC compiler bug.
--- lib/fma.c.orig Thu Mar 15 12:09:33 2012
+++ lib/fma.c Thu Mar 15 12:06:55 2012
@@ -72,6 +72,13 @@
# pragma fenv_access (off)
#endif
+/* Work around GCC 4.2.1 bug on OpenBSD 5.1/SPARC64. */
+#if defined __GNUC__ && defined __sparc__
+# define VOLATILE volatile
+#else
+# define VOLATILE
+#endif
+
/* It is possible to write an implementation of fused multiply-add with
floating-point operations alone. See
Sylvie Boldo, Guillaume Melquiond:
@@ -866,16 +873,22 @@
else
{
/* First loop round. */
- fsum = (DOUBLE)
- ((sum[sum_len - k - 1] << (GMP_LIMB_BITS -
shift))
- | (sum_len >= k + 2 ? sum[sum_len - k - 2] >>
shift : 0));
+ {
+ VOLATILE mp_limb_t chunk =
+ (sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
+ | (sum_len >= k + 2 ? sum[sum_len - k - 2] >>
shift : 0);
+ fsum = (DOUBLE) chunk;
+ }
/* General loop. */
while (--k >= 0)
{
fsum *= chunk_multiplier;
- fsum += (DOUBLE)
- ((sum[sum_len - k - 1] << (GMP_LIMB_BITS -
shift))
- | (sum[sum_len - k - 2] >> shift));
+ {
+ VOLATILE mp_limb_t chunk =
+ (sum[sum_len - k - 1] << (GMP_LIMB_BITS -
shift))
+ | (sum[sum_len - k - 2] >> shift);
+ fsum += (DOUBLE) chunk;
+ }
}
}
}