The additional unit tests for fmod() and fmodl() revealed that gnulib's replacement algorithm returned wrong results when the quotient of the two arguments was large. For example: fmod (1e30, 1.5) In this case the result was not only wrong but also out-of-range.
A complete rewrite is necessary to fix it. 2012-03-04 Bruno Haible <br...@clisp.org> fmod, fmodl: Fix computation for large quotients x / y. * lib/fmod.c: Completely rewritten. * lib/fmodl.c (fmodl): Use implementation of fmod.c with USE_LONG_DOUBLE. * modules/fmod (Depends-on): Add isfinite, signbit, fabs, frexp, ldexp, isnand. Remove fma. * modules/fmodl (Depends-on): Add float, isfinite, signbit, fabsl, frexpl, ldexpl, isnanl. Remove fma. * m4/fmod.m4 (gl_FUNC_FMOD): Update computation of FMOD_LIBM. * m4/fmodl.m4 (gl_FUNC_FMODL): Update computation of FMODL_LIBM. ================================= lib/fmod.c ================================= /* Remainder. Copyright (C) 2011-2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ #if ! defined USE_LONG_DOUBLE # include <config.h> #endif /* Specification. */ #include <math.h> #include <float.h> #include <stdlib.h> #ifdef USE_LONG_DOUBLE # define FMOD fmodl # define DOUBLE long double # define MANT_DIG LDBL_MANT_DIG # define L_(literal) literal##L # define FREXP frexpl # define LDEXP ldexpl # define FABS fabsl # define TRUNC truncl # define ISNAN isnanl #else # define FMOD fmod # define DOUBLE double # define MANT_DIG DBL_MANT_DIG # define L_(literal) literal # define FREXP frexp # define LDEXP ldexp # define FABS fabs # define TRUNC trunc # define ISNAN isnand #endif /* MSVC with option -fp:strict refuses to compile constant initializers that contain floating-point operations. Pacify this compiler. */ #ifdef _MSC_VER # pragma fenv_access (off) #endif #undef NAN #if defined _MSC_VER static DOUBLE zero; # define NAN (zero / zero) #else # define NAN (L_(0.0) / L_(0.0)) #endif /* To avoid rounding errors during the computation of x - q * y, there are three possibilities: - Use fma (- q, y, x). - Have q be a single bit at a time, and compute x - q * y through a subtraction. - Have q be at most MANT_DIG/2 bits at a time, and compute x - q * y by splitting y into two halves: y = y1 * 2^(MANT_DIG/2) + y0 x - q * y = (x - q * y1 * 2^MANT_DIG/2) - q * y0. The latter is probably the most efficient. */ /* Number of bits in a limb. */ #define LIMB_BITS (MANT_DIG / 2) /* 2^LIMB_BITS. */ static const DOUBLE TWO_LIMB_BITS = /* Assume LIMB_BITS <= 3 * 31. Use the identity n = floor(n/3) + floor((n+1)/3) + floor((n+2)/3). */ (DOUBLE) (1U << (LIMB_BITS / 3)) * (DOUBLE) (1U << ((LIMB_BITS + 1) / 3)) * (DOUBLE) (1U << ((LIMB_BITS + 2) / 3)); /* 2^-LIMB_BITS. */ static const DOUBLE TWO_LIMB_BITS_INVERSE = /* Assume LIMB_BITS <= 3 * 31. Use the identity n = floor(n/3) + floor((n+1)/3) + floor((n+2)/3). */ L_(1.0) / ((DOUBLE) (1U << (LIMB_BITS / 3)) * (DOUBLE) (1U << ((LIMB_BITS + 1) / 3)) * (DOUBLE) (1U << ((LIMB_BITS + 2) / 3))); DOUBLE FMOD (DOUBLE x, DOUBLE y) { if (isfinite (x) && isfinite (y) && y != L_(0.0)) { if (x == L_(0.0)) /* Return x, regardless of the sign of y. */ return x; { int negate = ((!signbit (x)) ^ (!signbit (y))); /* Take the absolute value of x and y. */ x = FABS (x); y = FABS (y); /* Trivial case that requires no computation. */ if (x < y) return (negate ? - x : x); { int yexp; DOUBLE ym; DOUBLE y1; DOUBLE y0; int k; DOUBLE x2; DOUBLE x1; DOUBLE x0; /* Write y = 2^yexp * (y1 * 2^-LIMB_BITS + y0 * 2^-(2*LIMB_BITS)) where y1 is an integer, 2^(LIMB_BITS-1) <= y1 < 2^LIMB_BITS, y1 has at most LIMB_BITS bits, 0 <= y0 < 2^LIMB_BITS, y0 has at most (MANT_DIG + 1) / 2 bits. */ ym = FREXP (y, &yexp); ym = ym * TWO_LIMB_BITS; y1 = TRUNC (ym); y0 = (ym - y1) * TWO_LIMB_BITS; /* Write x = 2^(yexp+(k-3)*LIMB_BITS) * (x2 * 2^(2*LIMB_BITS) + x1 * 2^LIMB_BITS + x0) where x2, x1, x0 are each integers >= 0, < 2^LIMB_BITS. */ { int xexp; DOUBLE xm = FREXP (x, &xexp); /* Since we know x >= y, we know xexp >= yexp. */ xexp -= yexp; /* Compute k = ceil(xexp / LIMB_BITS). */ k = (xexp + LIMB_BITS - 1) / LIMB_BITS; /* Note: (k - 1) * LIMB_BITS + 1 <= xexp <= k * LIMB_BITS. */ /* Note: 0.5 <= xm < 1.0. */ xm = LDEXP (xm, xexp - (k - 1) * LIMB_BITS); /* Note: Now xm < 2^(xexp - (k - 1) * LIMB_BITS) <= 2^LIMB_BITS and xm >= 0.5 * 2^(xexp - (k - 1) * LIMB_BITS) >= 1.0 and xm has at most MANT_DIG <= 2*LIMB_BITS+1 bits. */ x2 = TRUNC (xm); x1 = (xm - x2) * TWO_LIMB_BITS; /* Split off x0 from x1 later. */ } /* Test whether [x2,x1,0] >= 2^LIMB_BITS * [y1,y0]. */ if (x2 > y1 || (x2 == y1 && x1 >= y0)) { /* Subtract 2^LIMB_BITS * [y1,y0] from [x2,x1,0]. */ x2 -= y1; x1 -= y0; if (x1 < L_(0.0)) { if (!(x2 >= L_(1.0))) abort (); x2 -= L_(1.0); x1 += TWO_LIMB_BITS; } } /* Split off x0 from x1. */ { DOUBLE x1int = TRUNC (x1); x0 = TRUNC ((x1 - x1int) * TWO_LIMB_BITS); x1 = x1int; } for (; k > 0; k--) { /* Multiprecision division of the limb sequence [x2,x1,x0] by [y1,y0]. */ /* Here [x2,x1,x0] < 2^LIMB_BITS * [y1,y0]. */ /* The first guess takes into account only [x2,x1] and [y1]. By Knuth's theorem, we know that q* = min (floor ([x2,x1] / [y1]), 2^LIMB_BITS - 1) and q = floor ([x2,x1,x0] / [y1,y0]) are not far away: q* - 2 <= q <= q* + 1. Proof: a) q* * y1 <= floor ([x2,x1] / [y1]) * y1 <= [x2,x1]. Hence [x2,x1,x0] - q* * [y1,y0] = 2^LIMB_BITS * ([x2,x1] - q* * [y1]) + x0 - q* * y0 >= x0 - q* * y0 >= - q* * y0 > - 2^(2*LIMB_BITS) >= - 2 * [y1,y0] So [x2,x1,x0] > (q* - 2) * [y1,y0]. b) If q* = floor ([x2,x1] / [y1]), then [x2,x1] < (q* + 1) * y1 Hence [x2,x1,x0] - q* * [y1,y0] = 2^LIMB_BITS * ([x2,x1] - q* * [y1]) + x0 - q* * y0 <= 2^LIMB_BITS * (y1 - 1) + x0 - q* * y0 <= 2^LIMB_BITS * (2^LIMB_BITS-2) + (2^LIMB_BITS-1) - 0 < 2^(2*LIMB_BITS) <= 2 * [y1,y0] So [x2,x1,x0] < (q* + 2) * [y1,y0]. and so q < q* + 2 which implies q <= q* + 1. In the other case, q* = 2^LIMB_BITS - 1. Then trivially q < 2^LIMB_BITS = q* + 1. We know that floor ([x2,x1] / [y1]) >= 2^LIMB_BITS if and only if x2 >= y1. */ DOUBLE q = (x2 >= y1 ? TWO_LIMB_BITS - L_(1.0) : TRUNC ((x2 * TWO_LIMB_BITS + x1) / y1)); if (q > L_(0.0)) { /* Compute [x2,x1,x0] - q* * [y1,y0] = 2^LIMB_BITS * ([x2,x1] - q* * [y1]) + x0 - q* * y0. */ DOUBLE q_y1 = q * y1; /* exact, at most 2*LIMB_BITS bits */ DOUBLE q_y1_1 = TRUNC (q_y1 * TWO_LIMB_BITS_INVERSE); DOUBLE q_y1_0 = q_y1 - q_y1_1 * TWO_LIMB_BITS; DOUBLE q_y0 = q * y0; /* exact, at most MANT_DIG bits */ DOUBLE q_y0_1 = TRUNC (q_y0 * TWO_LIMB_BITS_INVERSE); DOUBLE q_y0_0 = q_y0 - q_y0_1 * TWO_LIMB_BITS; x2 -= q_y1_1; x1 -= q_y1_0; x1 -= q_y0_1; x0 -= q_y0_0; /* Move negative carry from x0 to x1 and from x1 to x2. */ if (x0 < L_(0.0)) { x0 += TWO_LIMB_BITS; x1 -= L_(1.0); } if (x1 < L_(0.0)) { x1 += TWO_LIMB_BITS; x2 -= L_(1.0); if (x1 < L_(0.0)) /* not sure this can happen */ { x1 += TWO_LIMB_BITS; x2 -= L_(1.0); } } if (x2 < L_(0.0)) { /* Reduce q by 1. */ x1 += y1; x0 += y0; /* Move overflow from x0 to x1 and from x1 to x0. */ if (x0 >= TWO_LIMB_BITS) { x0 -= TWO_LIMB_BITS; x1 += L_(1.0); } if (x1 >= TWO_LIMB_BITS) { x1 -= TWO_LIMB_BITS; x2 += L_(1.0); } if (x2 < L_(0.0)) { /* Reduce q by 1 again. */ x1 += y1; x0 += y0; /* Move overflow from x0 to x1 and from x1 to x0. */ if (x0 >= TWO_LIMB_BITS) { x0 -= TWO_LIMB_BITS; x1 += L_(1.0); } if (x1 >= TWO_LIMB_BITS) { x1 -= TWO_LIMB_BITS; x2 += L_(1.0); } if (x2 < L_(0.0)) /* Shouldn't happen, because we proved that q >= q* - 2. */ abort (); } } } if (x2 > L_(0.0) || x1 > y1 || (x1 == y1 && x0 >= y0)) { /* Increase q by 1. */ x1 -= y1; x0 -= y0; /* Move negative carry from x0 to x1 and from x1 to x2. */ if (x0 < L_(0.0)) { x0 += TWO_LIMB_BITS; x1 -= L_(1.0); } if (x1 < L_(0.0)) { x1 += TWO_LIMB_BITS; x2 -= L_(1.0); } if (x2 < L_(0.0)) abort (); if (x2 > L_(0.0) || x1 > y1 || (x1 == y1 && x0 >= y0)) /* Shouldn't happen, because we proved that q <= q* + 1. */ abort (); } /* Here [x2,x1,x0] < [y1,y0]. */ /* Next round. */ x2 = x1; #if (MANT_DIG + 1) / 2 > LIMB_BITS /* y0 can have a fractional bit */ x1 = TRUNC (x0); x0 = (x0 - x1) * TWO_LIMB_BITS; #else x1 = x0; x0 = L_(0.0); #endif /* Here [x2,x1,x0] < 2^LIMB_BITS * [y1,y0]. */ } /* Here k = 0. The result is 2^(yexp-3*LIMB_BITS) * (x2 * 2^(2*LIMB_BITS) + x1 * 2^LIMB_BITS + x0). */ { DOUBLE r = LDEXP ((x2 * TWO_LIMB_BITS + x1) * TWO_LIMB_BITS + x0, yexp - 3 * LIMB_BITS); return (negate ? - r : r); } } } } else { if (ISNAN (x) || ISNAN (y)) return x + y; /* NaN */ else if (isinf (y)) return x; else /* x infinite or y zero */ return NAN; } } ============================================================================== --- lib/fmodl.c.orig Sun Mar 4 20:46:31 2012 +++ lib/fmodl.c Sun Mar 4 13:28:51 2012 @@ -29,56 +29,7 @@ #else -long double -fmodl (long double x, long double y) -{ - if (isinf (y)) - return x; - else - { - long double q = - truncl (x / y); - long double r = fmal (q, y, x); /* = x + q * y, computed in one step */ - /* Correct possible rounding errors in the quotient x / y. */ - if (y >= 0) - { - if (x >= 0) - { - /* Expect 0 <= r < y. */ - if (r < 0) - q += 1, r = fmal (q, y, x); - else if (r >= y) - q -= 1, r = fmal (q, y, x); - } - else - { - /* Expect - y < r <= 0. */ - if (r > 0) - q -= 1, r = fmal (q, y, x); - else if (r <= - y) - q += 1, r = fmal (q, y, x); - } - } - else - { - if (x >= 0) - { - /* Expect 0 <= r < - y. */ - if (r < 0) - q -= 1, r = fmal (q, y, x); - else if (r >= - y) - q += 1, r = fmal (q, y, x); - } - else - { - /* Expect y < r <= 0. */ - if (r > 0) - q += 1, r = fmal (q, y, x); - else if (r <= y) - q -= 1, r = fmal (q, y, x); - } - } - return r; - } -} +# define USE_LONG_DOUBLE +# include "fmod.c" #endif --- m4/fmod.m4.orig Sun Mar 4 20:46:31 2012 +++ m4/fmod.m4 Sun Mar 4 18:22:03 2012 @@ -1,4 +1,4 @@ -# fmod.m4 serial 2 +# fmod.m4 serial 3 dnl Copyright (C) 2011-2012 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -67,18 +67,36 @@ ]) if test $REPLACE_FMOD = 1; then dnl Find libraries needed to link lib/fmod.c. + AC_REQUIRE([gl_FUNC_FABS]) + AC_REQUIRE([gl_FUNC_FREXP]) AC_REQUIRE([gl_FUNC_TRUNC]) - AC_REQUIRE([gl_FUNC_FMA]) + AC_REQUIRE([gl_FUNC_LDEXP]) + AC_REQUIRE([gl_FUNC_ISNAND]) FMOD_LIBM= + dnl Append $FABS_LIBM to FMOD_LIBM, avoiding gratuitous duplicates. + case " $FMOD_LIBM " in + *" $FABS_LIBM "*) ;; + *) FMOD_LIBM="$FMOD_LIBM $FABS_LIBM" ;; + esac + dnl Append $FREXP_LIBM to FMOD_LIBM, avoiding gratuitous duplicates. + case " $FMOD_LIBM " in + *" $FREXP_LIBM "*) ;; + *) FMOD_LIBM="$FMOD_LIBM $FREXP_LIBM" ;; + esac dnl Append $TRUNC_LIBM to FMOD_LIBM, avoiding gratuitous duplicates. case " $FMOD_LIBM " in *" $TRUNC_LIBM "*) ;; *) FMOD_LIBM="$FMOD_LIBM $TRUNC_LIBM" ;; esac - dnl Append $FMA_LIBM to FMOD_LIBM, avoiding gratuitous duplicates. + dnl Append $LDEXP_LIBM to FMOD_LIBM, avoiding gratuitous duplicates. + case " $FMOD_LIBM " in + *" $LDEXP_LIBM "*) ;; + *) FMOD_LIBM="$FMOD_LIBM $LDEXP_LIBM" ;; + esac + dnl Append $ISNAND_LIBM to FMOD_LIBM, avoiding gratuitous duplicates. case " $FMOD_LIBM " in - *" $FMA_LIBM "*) ;; - *) FMOD_LIBM="$FMOD_LIBM $FMA_LIBM" ;; + *" $ISNAND_LIBM "*) ;; + *) FMOD_LIBM="$FMOD_LIBM $ISNAND_LIBM" ;; esac fi ]) --- m4/fmodl.m4.orig Sun Mar 4 20:46:31 2012 +++ m4/fmodl.m4 Sun Mar 4 18:22:01 2012 @@ -1,4 +1,4 @@ -# fmodl.m4 serial 2 +# fmodl.m4 serial 3 dnl Copyright (C) 2011-2012 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -82,18 +82,36 @@ if test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 1; then FMODL_LIBM="$FMOD_LIBM" else + AC_REQUIRE([gl_FUNC_FABSL]) + AC_REQUIRE([gl_FUNC_FREXPL]) AC_REQUIRE([gl_FUNC_TRUNCL]) - AC_REQUIRE([gl_FUNC_FMAL]) + AC_REQUIRE([gl_FUNC_LDEXPL]) + AC_REQUIRE([gl_FUNC_ISNANL]) FMODL_LIBM= + dnl Append $FABSL_LIBM to FMODL_LIBM, avoiding gratuitous duplicates. + case " $FMODL_LIBM " in + *" $FABSL_LIBM "*) ;; + *) FMODL_LIBM="$FMODL_LIBM $FABSL_LIBM" ;; + esac + dnl Append $FREXPL_LIBM to FMODL_LIBM, avoiding gratuitous duplicates. + case " $FMODL_LIBM " in + *" $FREXPL_LIBM "*) ;; + *) FMODL_LIBM="$FMODL_LIBM $FREXPL_LIBM" ;; + esac dnl Append $TRUNCL_LIBM to FMODL_LIBM, avoiding gratuitous duplicates. case " $FMODL_LIBM " in *" $TRUNCL_LIBM "*) ;; *) FMODL_LIBM="$FMODL_LIBM $TRUNCL_LIBM" ;; esac - dnl Append $FMAL_LIBM to FMODL_LIBM, avoiding gratuitous duplicates. + dnl Append $LDEXPL_LIBM to FMODL_LIBM, avoiding gratuitous duplicates. + case " $FMODL_LIBM " in + *" $LDEXPL_LIBM "*) ;; + *) FMODL_LIBM="$FMODL_LIBM $LDEXPL_LIBM" ;; + esac + dnl Append $ISNANL_LIBM to FMODL_LIBM, avoiding gratuitous duplicates. case " $FMODL_LIBM " in - *" $FMAL_LIBM "*) ;; - *) FMODL_LIBM="$FMODL_LIBM $FMAL_LIBM" ;; + *" $ISNANL_LIBM "*) ;; + *) FMODL_LIBM="$FMODL_LIBM $ISNANL_LIBM" ;; esac fi fi --- modules/fmod.orig Sun Mar 4 20:46:31 2012 +++ modules/fmod Sun Mar 4 18:10:10 2012 @@ -8,9 +8,14 @@ Depends-on: math -isinf [test $REPLACE_FMOD = 1] +isfinite [test $REPLACE_FMOD = 1] +signbit [test $REPLACE_FMOD = 1] +fabs [test $REPLACE_FMOD = 1] +frexp [test $REPLACE_FMOD = 1] trunc [test $REPLACE_FMOD = 1] -fma [test $REPLACE_FMOD = 1] +ldexp [test $REPLACE_FMOD = 1] +isnand [test $REPLACE_FMOD = 1] +isinf [test $REPLACE_FMOD = 1] configure.ac: gl_FUNC_FMOD --- modules/fmodl.orig Sun Mar 4 20:46:31 2012 +++ modules/fmodl Sun Mar 4 18:27:21 2012 @@ -9,9 +9,15 @@ Depends-on: math fmod [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 1] -isinf [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0] +float [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0] +isfinite [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0] +signbit [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0] +fabsl [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0] +frexpl [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0] truncl [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0] -fmal [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0] +ldexpl [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0] +isnanl [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0] +isinf [{ test $HAVE_FMODL = 0 || test $REPLACE_FMODL = 1; } && test $HAVE_SAME_LONG_DOUBLE_AS_DOUBLE = 0] configure.ac: gl_FUNC_FMODL