This patch fixes various cases of spurious overflow exceptions in the IBM long double support code. The generic issue is that an initial approximation is computed by using the relevant arithmetic operation on the high parts of the operands - but this may overflow double in some cases where the final result is large but still a long way (up to around 2^53 ulp) from overflowing long double. For division overflow could occur not just from the initial a / c division but also from the subsequent multiplication of the result by c (in some cases where a is DBL_MAX, say), when the final result of the division need not be large at all.
__gcc_qadd already tried to handle such overflow cases, but detected them by examining the result of the addition of high parts - which leaves a spurious overflow exception raised even if it returns the correct non-overflowing value. This patch instead checks the operands and does appropriate scaling, in all of __gcc_qadd, __gcc_qmul and __gcc_qdiv, to avoid spurious overflow exceptions arising as well as avoiding the bad results arising from such overflows. Tested with no regressions with cross to powerpc-linux-gnu (and also ran the glibc libm tests, which provide a rather more thorough test of floating-point arithmetic than the GCC testsuite; this patch fixes, at least, bad results from cbrtl (LDBL_MAX) that arose from the second division issue mentioned, as well as the specific cases shown in the tests added to the GCC testsuite). OK to commit? libgcc: 2014-01-04 Joseph Myers <jos...@codesourcery.com> * config/rs6000/ibm-ldouble.c (bool): New typedef. (false, true): New macros. (__gcc_qadd): Detect possible overflow before rather than after possibly overflowing arithmetic and scale operands and results in that case. (__gcc_qmul, __gcc_qdiv): Detect possible overflow and scale operands and results in that case. gcc/testsuite: 2014-01-04 Joseph Myers <jos...@codesourcery.com> * gcc.target/powerpc/rs6000-ldouble-4.c, gcc.target/powerpc/rs6000-ldouble-5.c, gcc.target/powerpc/rs6000-ldouble-6.c, gcc.target/powerpc/rs6000-ldouble-7.c: New tests. Index: libgcc/config/rs6000/ibm-ldouble.c =================================================================== --- libgcc/config/rs6000/ibm-ldouble.c (revision 206325) +++ libgcc/config/rs6000/ibm-ldouble.c (working copy) @@ -47,6 +47,10 @@ see the files COPYING3 and COPYING.RUNTIME respect #if defined (__MACH__) || defined (__powerpc__) || defined (_AIX) +typedef _Bool bool; +#define false 0 +#define true 1 + #define fabs(x) __builtin_fabs(x) #define isless(x, y) __builtin_isless (x, y) #define inf() __builtin_inf() @@ -98,40 +102,47 @@ long double __gcc_qadd (double a, double aa, double c, double cc) { longDblUnion x; - double z, q, zz, xh; + double z, q, zz, xh, xhs; + bool scale = false; - z = a + c; + if (nonfinite (a) || nonfinite (c)) + return a + c; - if (nonfinite (z)) + if (fabs (a) >= 0x1p1022 || fabs (c) >= 0x1p1022) { - if (fabs (z) != inf()) - return z; - z = cc + aa + c + a; - if (nonfinite (z)) - return z; - x.dval[0] = z; /* Will always be DBL_MAX. */ - zz = aa + cc; - if (fabs(a) > fabs(c)) - x.dval[1] = a - z + c + zz; - else - x.dval[1] = c - z + a + zz; + scale = true; + if (fabs (a) >= 0x1p914) + { + a *= 0.5; + aa *= 0.5; + } + if (fabs (c) >= 0x1p914) + { + c *= 0.5; + cc *= 0.5; + } } - else - { - q = a - z; - zz = q + c + (a - (q + z)) + aa + cc; - /* Keep -0 result. */ - if (zz == 0.0) - return z; + z = a + c; + q = a - z; + zz = q + c + (a - (q + z)) + aa + cc; - xh = z + zz; - if (nonfinite (xh)) - return xh; + /* Keep -0 result. */ + if (zz == 0.0) + return (scale ? 2.0 * z : z); - x.dval[0] = xh; - x.dval[1] = z - xh + zz; - } + xh = z + zz; + xhs = xh; + if (scale) + xhs *= 2.0; + if (nonfinite (xhs)) + return xhs; + + x.dval[0] = xhs; + x.dval[1] = z - xh + zz; + if (scale) + x.dval[1] *= 2.0; + return x.ldval; } @@ -149,7 +160,24 @@ long double __gcc_qmul (double a, double b, double c, double d) { longDblUnion z; - double t, tau, u, v, w; + double t, tau, u, us, v, w; + bool scale = false; + + if (nonfinite (a) || nonfinite (c)) + return a * c; + + if (fabs (a) >= 0x1p512) + { + scale = true; + a *= 0.5; + b *= 0.5; + } + else if (fabs (c) >= 0x1p512) + { + scale = true; + c *= 0.5; + d *= 0.5; + } t = a * c; /* Highest order double term. */ @@ -169,12 +197,17 @@ __gcc_qmul (double a, double b, double c, double d w = b*c; tau += v + w; /* Add in other second-order terms. */ u = t + tau; + us = u; + if (scale) + us *= 2.0; /* Construct long double result. */ - if (nonfinite (u)) - return u; - z.dval[0] = u; + if (nonfinite (us)) + return us; + z.dval[0] = us; z.dval[1] = (t - u) + tau; + if (scale) + z.dval[1] *= 2.0; return z.ldval; } @@ -182,7 +215,24 @@ long double __gcc_qdiv (double a, double b, double c, double d) { longDblUnion z; - double s, sigma, t, tau, u, v, w; + double s, sigma, t, tau, u, us, v, w; + bool scale = false; + + if (nonfinite (a) || nonfinite (c)) + return a / c; + + if (fabs (a) >= 0x1p512) + { + scale = true; + a *= 0.5; + b *= 0.5; + } + else if (fabs (c) <= 0x1p-511) + { + scale = true; + c *= 2.0; + d *= 2.0; + } t = a / c; /* highest order double term */ @@ -215,12 +265,17 @@ __gcc_qdiv (double a, double b, double c, double d tau = ((v-sigma)+w)/c; /* Correction to t. */ u = t + tau; + us = u; + if (scale) + us *= 2.0; /* Construct long double result. */ - if (nonfinite (u)) - return u; - z.dval[0] = u; + if (nonfinite (us)) + return us; + z.dval[0] = us; z.dval[1] = (t - u) + tau; + if (scale) + z.dval[1] *= 2.0; return z.ldval; } Index: gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-4.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-4.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-4.c (revision 0) @@ -0,0 +1,30 @@ +/* Test long double addition overflow. */ +/* { dg-do run { target powerpc*-*-linux* powerpc*-*-darwin* powerpc*-*-aix* rs6000-*-* } } */ +/* { dg-options "-mlong-double-128" } */ +/* { dg-additional-options "-DHAVE_FENV" { target fenv_exceptions } } */ + +#ifdef HAVE_FENV +#include <fenv.h> +#endif + +extern void exit (int); +extern void abort (void); + +volatile long double a = 0x1.ffffffffffffd8p1022L; +volatile long double b = 0x1.0000000000000ap1023L; +volatile long double r; +volatile long double expected = 0x1.fffffffffffff6p1023L; + +int +main (void) +{ + r = a + b; + /* Allow error up to 2 ulp. */ + if (__builtin_fabsl (r - expected) > 0x1p919L) + abort (); +#ifdef HAVE_FENV + if (fetestexcept (FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)) + abort (); +#endif + exit (0); +} Index: gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-5.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-5.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-5.c (revision 0) @@ -0,0 +1,30 @@ +/* Test long double multiplication overflow. */ +/* { dg-do run { target powerpc*-*-linux* powerpc*-*-darwin* powerpc*-*-aix* rs6000-*-* } } */ +/* { dg-options "-mlong-double-128" } */ +/* { dg-additional-options "-DHAVE_FENV" { target fenv_exceptions } } */ + +#ifdef HAVE_FENV +#include <fenv.h> +#endif + +extern void exit (int); +extern void abort (void); + +volatile long double a = 0x1.fffffffffffff8p511L; +volatile long double b = 0x1.fffffffffffff8p511L; +volatile long double r; +volatile long double expected = 0x1.fffffffffffffp1023L; + +int +main (void) +{ + r = a * b; + /* Allow error up to 2 ulp. */ + if (__builtin_fabsl (r - expected) > 0x1p919L) + abort (); +#ifdef HAVE_FENV + if (fetestexcept (FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)) + abort (); +#endif + exit (0); +} Index: gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-6.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-6.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-6.c (revision 0) @@ -0,0 +1,31 @@ +/* Test long double division overflow (initial division of high + parts). */ +/* { dg-do run { target powerpc*-*-linux* powerpc*-*-darwin* powerpc*-*-aix* rs6000-*-* } } */ +/* { dg-options "-mlong-double-128" } */ +/* { dg-additional-options "-DHAVE_FENV" { target fenv_exceptions } } */ + +#ifdef HAVE_FENV +#include <fenv.h> +#endif + +extern void exit (int); +extern void abort (void); + +volatile long double a = 0x1.7ffffffffffff8p512L; +volatile long double b = 0x1.80000000000008p-512L; +volatile long double r; +volatile long double expected = 0x1.ffffffffffffeaaaaaaaaaaaabp1023L; + +int +main (void) +{ + r = a / b; + /* Allow error up to 3 ulp. */ + if (__builtin_fabsl (r - expected) > 0x3p918L) + abort (); +#ifdef HAVE_FENV + if (fetestexcept (FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)) + abort (); +#endif + exit (0); +} Index: gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-7.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-7.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/rs6000-ldouble-7.c (revision 0) @@ -0,0 +1,31 @@ +/* Test long double division overflow (multiplication of initial + division result). */ +/* { dg-do run { target powerpc*-*-linux* powerpc*-*-darwin* powerpc*-*-aix* rs6000-*-* } } */ +/* { dg-options "-mlong-double-128" } */ +/* { dg-additional-options "-DHAVE_FENV" { target fenv_exceptions } } */ + +#ifdef HAVE_FENV +#include <fenv.h> +#endif + +extern void exit (int); +extern void abort (void); + +volatile long double a = 0x1.fffffffffffffp1023L; +volatile long double b = 3.0L; +volatile long double r; +volatile long double expected = 0x1.5555555555554aaaaaaaaaaaaa8p+1022L; + +int +main (void) +{ + r = a / b; + /* Allow error up to 3 ulp. */ + if (__builtin_fabsl (r - expected) > 0x3p918L) + abort (); +#ifdef HAVE_FENV + if (fetestexcept (FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)) + abort (); +#endif + exit (0); +} -- Joseph S. Myers jos...@codesourcery.com