Hi, It was recently brought to my attention that glibc needs access to complex multiply and divide for IEEE-128 floating-point in GCC 6.2 in order to move ahead with the library implementation work. This patch enables this support using only target-specific changes to avoid any possible effect on other targets. This is not the correct long-term approach, and I am working on a patch that instead makes use of the common infrastructure. The plan is to use the current patch for GCC 6, and replace it with the other approach in GCC 7 shortly.
Thus this patch copies the common code for complex multiply and divide out of libgcc2.c into separate Power-specific files, and specializes it for the KC type. It adds a couple of straightforward tests to verify that the approach works. I've tested the code generated for these tests on a POWER9 simulator as well as a POWER8 machine. Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no regressions. I've also asked the glibc team to verify that this serves their requirements. Is this ok for trunk, and for gcc-6-branch after a short burn-in period? Thanks, Bill [libgcc] 2016-07-11 Bill Schmidt <wschm...@linux.vnet.ibm.com> * config/rs6000/_divkc3.c: New. * config/rs6000/_mulkc3.c: New. * config/rs6000/quad-float128.h: Define TFtype; declare _mulkc3 and _divkc3. * config/rs6000/t-float128: Add _mulkc3 and _divkc3 to fp128_ppc_funcs. [gcc/testsuite] 2016-07-11 Bill Schmidt <wschm...@linux.vnet.ibm.com> * gcc.target/powerpc/divkc3-1.c: New. * gcc.target/powerpc/mulkc3-1.c: New. Index: gcc/testsuite/gcc.target/powerpc/divkc3-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/divkc3-1.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/divkc3-1.c (working copy) @@ -0,0 +1,22 @@ +/* { dg-do run { target { powerpc64*-*-* && vsx_hw } } } */ +/* { dg-options "-mfloat128 -mvsx" } */ + +void abort (); + +typedef __complex float __cfloat128 __attribute__((mode(KC))); + +__cfloat128 divide (__cfloat128 x, __cfloat128 y) +{ + return x / y; +} + +__cfloat128 z, a; + +int main () +{ + z = divide (5.0q + 5.0jq, 2.0q + 1.0jq); + a = 3.0q + 1.0jq; + if (z != a) + abort (); + return 0; +} Index: gcc/testsuite/gcc.target/powerpc/mulkc3-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/mulkc3-1.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/mulkc3-1.c (working copy) @@ -0,0 +1,22 @@ +/* { dg-do run { target { powerpc64*-*-* && vsx_hw } } } */ +/* { dg-options "-mfloat128 -mvsx" } */ + +void abort (); + +typedef __complex float __cfloat128 __attribute__((mode(KC))); + +__cfloat128 multiply (__cfloat128 x, __cfloat128 y) +{ + return x * y; +} + +__cfloat128 z, a; + +int main () +{ + z = multiply (2.0q + 1.0jq, 3.0q + 1.0jq); + a = 5.0q + 5.0jq; + if (z != a) + abort (); + return 0; +} Index: libgcc/config/rs6000/_divkc3.c =================================================================== --- libgcc/config/rs6000/_divkc3.c (revision 0) +++ libgcc/config/rs6000/_divkc3.c (working copy) @@ -0,0 +1,64 @@ +typedef float KFtype __attribute__ ((mode (KF))); +typedef __complex float KCtype __attribute__ ((mode (KC))); + +#define COPYSIGN(x,y) __builtin_copysignq (x, y) +#define INFINITY __builtin_infq () +#define FABS __builtin_fabsq +#define isnan __builtin_isnan +#define isinf __builtin_isinf +#define isfinite __builtin_isfinite + +KCtype +__divkc3 (KFtype a, KFtype b, KFtype c, KFtype d) +{ + KFtype denom, ratio, x, y; + KCtype res; + + /* ??? We can get better behavior from logarithmic scaling instead of + the division. But that would mean starting to link libgcc against + libm. We could implement something akin to ldexp/frexp as gcc builtins + fairly easily... */ + if (FABS (c) < FABS (d)) + { + ratio = c / d; + denom = (c * ratio) + d; + x = ((a * ratio) + b) / denom; + y = ((b * ratio) - a) / denom; + } + else + { + ratio = d / c; + denom = (d * ratio) + c; + x = ((b * ratio) + a) / denom; + y = (b - (a * ratio)) / denom; + } + + /* Recover infinities and zeros that computed as NaN+iNaN; the only cases + are nonzero/zero, infinite/finite, and finite/infinite. */ + if (isnan (x) && isnan (y)) + { + if (c == 0.0 && d == 0.0 && (!isnan (a) || !isnan (b))) + { + x = COPYSIGN (INFINITY, c) * a; + y = COPYSIGN (INFINITY, c) * b; + } + else if ((isinf (a) || isinf (b)) && isfinite (c) && isfinite (d)) + { + a = COPYSIGN (isinf (a) ? 1 : 0, a); + b = COPYSIGN (isinf (b) ? 1 : 0, b); + x = INFINITY * (a * c + b * d); + y = INFINITY * (b * c - a * d); + } + else if ((isinf (c) || isinf (d)) && isfinite (a) && isfinite (b)) + { + c = COPYSIGN (isinf (c) ? 1 : 0, c); + d = COPYSIGN (isinf (d) ? 1 : 0, d); + x = 0.0 * (a * c + b * d); + y = 0.0 * (b * c - a * d); + } + } + + __real__ res = x; + __imag__ res = y; + return res; +} Index: libgcc/config/rs6000/_mulkc3.c =================================================================== --- libgcc/config/rs6000/_mulkc3.c (revision 0) +++ libgcc/config/rs6000/_mulkc3.c (working copy) @@ -0,0 +1,69 @@ +typedef float KFtype __attribute__ ((mode (KF))); +typedef __complex float KCtype __attribute__ ((mode (KC))); + +#define COPYSIGN(x,y) __builtin_copysignq (x, y) +#define INFINITY __builtin_infq () +#define isnan __builtin_isnan +#define isinf __builtin_isinf + +KCtype +__mulkc3 (KFtype a, KFtype b, KFtype c, KFtype d) +{ + KFtype ac, bd, ad, bc, x, y; + KCtype res; + + ac = a * c; + bd = b * d; + ad = a * d; + bc = b * c; + + x = ac - bd; + y = ad + bc; + + if (isnan (x) && isnan (y)) + { + /* Recover infinities that computed as NaN + iNaN. */ + _Bool recalc = 0; + if (isinf (a) || isinf (b)) + { + /* z is infinite. "Box" the infinity and change NaNs in + the other factor to 0. */ + a = COPYSIGN (isinf (a) ? 1 : 0, a); + b = COPYSIGN (isinf (b) ? 1 : 0, b); + if (isnan (c)) c = COPYSIGN (0, c); + if (isnan (d)) d = COPYSIGN (0, d); + recalc = 1; + } + if (isinf (c) || isinf (d)) + { + /* w is infinite. "Box" the infinity and change NaNs in + the other factor to 0. */ + c = COPYSIGN (isinf (c) ? 1 : 0, c); + d = COPYSIGN (isinf (d) ? 1 : 0, d); + if (isnan (a)) a = COPYSIGN (0, a); + if (isnan (b)) b = COPYSIGN (0, b); + recalc = 1; + } + if (!recalc + && (isinf (ac) || isinf (bd) + || isinf (ad) || isinf (bc))) + { + /* Recover infinities from overflow by changing NaNs to 0. */ + if (isnan (a)) a = COPYSIGN (0, a); + if (isnan (b)) b = COPYSIGN (0, b); + if (isnan (c)) c = COPYSIGN (0, c); + if (isnan (d)) d = COPYSIGN (0, d); + recalc = 1; + } + if (recalc) + { + x = INFINITY * (a * c - b * d); + y = INFINITY * (a * d + b * c); + } + } + + __real__ res = x; + __imag__ res = y; + return res; +} + Index: libgcc/config/rs6000/quad-float128.h =================================================================== --- libgcc/config/rs6000/quad-float128.h (revision 238213) +++ libgcc/config/rs6000/quad-float128.h (working copy) @@ -33,6 +33,10 @@ This define forces it to use KFmode (aka, ieee 128-bit floating point). */ #define TF KF +/* We also need TCtype to represent complex ieee 128-bit float for + __mulkc3 and __divkc3. */ +typedef __complex float TCtype __attribute__ ((mode (KC))); + /* Force the use of the VSX instruction set. */ #if defined(_ARCH_PPC) && (!defined(__VSX__) || !defined(__FLOAT128__)) #pragma GCC target ("vsx,float128") @@ -154,6 +158,10 @@ extern TFtype __floatundikf (UDItype_ppc); extern IBM128_TYPE __extendkftf2 (TFtype); extern TFtype __trunctfkf2 (IBM128_TYPE); +/* Complex __float128 built on __float128 interfaces. */ +extern TCtype __mulkc3 (TFtype, TFtype, TFtype, TFtype); +extern TCtype __divkc3 (TFtype, TFtype, TFtype, TFtype); + /* Implementation of conversions between __ibm128 and __float128, to allow the same code to be used on systems with IEEE 128-bit emulation and with IEEE 128-bit hardware support. */ Index: libgcc/config/rs6000/t-float128 =================================================================== --- libgcc/config/rs6000/t-float128 (revision 238213) +++ libgcc/config/rs6000/t-float128 (working copy) @@ -25,7 +25,7 @@ fp128_softfp_obj = $(fp128_softfp_static_obj) $(fp # New functions for software emulation fp128_ppc_funcs = floattikf floatuntikf fixkfti fixunskfti \ extendkftf2-sw trunctfkf2-sw \ - sfp-exceptions + sfp-exceptions _mulkc3 _divkc3 fp128_ppc_src = $(addprefix $(srcdir)/config/rs6000/,$(addsuffix \ .c,$(fp128_ppc_funcs)))