[PATCH, rs6000, libgcc] Implement temporary solution for divkc3 and mulkc3

Bill Schmidt Mon, 11 Jul 2016 12:39:47 -0700

Hi,

It was recently brought to my attention that glibc needs access to complex
multiply and divide for IEEE-128 floating-point in GCC 6.2 in order to move
ahead with the library implementation work.  This patch enables this support
using only target-specific changes to avoid any possible effect on other
targets.  This is not the correct long-term approach, and I am working on a
patch that instead makes use of the common infrastructure.  The plan is to
use the current patch for GCC 6, and replace it with the other approach in
GCC 7 shortly.


Thus this patch copies the common code for complex multiply and divide out
of libgcc2.c into separate Power-specific files, and specializes it for
the KC type.  It adds a couple of straightforward tests to verify that the
approach works.  I've tested the code generated for these tests on a POWER9
simulator as well as a POWER8 machine.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no regressions.
I've also asked the glibc team to verify that this serves their requirements.
Is this ok for trunk, and for gcc-6-branch after a short burn-in period?

Thanks,
Bill



[libgcc]

2016-07-11  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * config/rs6000/_divkc3.c: New.
        * config/rs6000/_mulkc3.c: New.
        * config/rs6000/quad-float128.h: Define TFtype; declare _mulkc3
        and _divkc3.
        * config/rs6000/t-float128: Add _mulkc3 and _divkc3 to
        fp128_ppc_funcs.

[gcc/testsuite]

2016-07-11  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * gcc.target/powerpc/divkc3-1.c: New.
        * gcc.target/powerpc/mulkc3-1.c: New.


Index: gcc/testsuite/gcc.target/powerpc/divkc3-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/divkc3-1.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/divkc3-1.c (working copy)
@@ -0,0 +1,22 @@
+/* { dg-do run { target { powerpc64*-*-* && vsx_hw } } } */
+/* { dg-options "-mfloat128 -mvsx" } */
+
+void abort ();
+
+typedef __complex float __cfloat128 __attribute__((mode(KC)));
+
+__cfloat128 divide (__cfloat128 x, __cfloat128 y)
+{
+  return x / y;
+}
+
+__cfloat128 z, a;
+
+int main ()
+{
+  z = divide (5.0q + 5.0jq, 2.0q + 1.0jq);
+  a = 3.0q + 1.0jq;
+  if (z != a)
+    abort ();
+  return 0;
+}
Index: gcc/testsuite/gcc.target/powerpc/mulkc3-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/mulkc3-1.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/mulkc3-1.c (working copy)
@@ -0,0 +1,22 @@
+/* { dg-do run { target { powerpc64*-*-* && vsx_hw } } } */
+/* { dg-options "-mfloat128 -mvsx" } */
+
+void abort ();
+
+typedef __complex float __cfloat128 __attribute__((mode(KC)));
+
+__cfloat128 multiply (__cfloat128 x, __cfloat128 y)
+{
+  return x * y;
+}
+
+__cfloat128 z, a;
+
+int main ()
+{
+  z = multiply (2.0q + 1.0jq, 3.0q + 1.0jq);
+  a = 5.0q + 5.0jq;
+  if (z != a)
+    abort ();
+  return 0;
+}
Index: libgcc/config/rs6000/_divkc3.c
===================================================================
--- libgcc/config/rs6000/_divkc3.c      (revision 0)
+++ libgcc/config/rs6000/_divkc3.c      (working copy)
@@ -0,0 +1,64 @@
+typedef float KFtype __attribute__ ((mode (KF)));
+typedef __complex float KCtype __attribute__ ((mode (KC)));
+
+#define COPYSIGN(x,y) __builtin_copysignq (x, y)
+#define INFINITY __builtin_infq ()
+#define FABS __builtin_fabsq
+#define isnan __builtin_isnan
+#define isinf __builtin_isinf
+#define isfinite __builtin_isfinite
+
+KCtype
+__divkc3 (KFtype a, KFtype b, KFtype c, KFtype d)
+{
+  KFtype denom, ratio, x, y;
+  KCtype res;
+
+  /* ??? We can get better behavior from logarithmic scaling instead of
+     the division.  But that would mean starting to link libgcc against
+     libm.  We could implement something akin to ldexp/frexp as gcc builtins
+     fairly easily...  */
+  if (FABS (c) < FABS (d))
+    {
+      ratio = c / d;
+      denom = (c * ratio) + d;
+      x = ((a * ratio) + b) / denom;
+      y = ((b * ratio) - a) / denom;
+    }
+  else
+    {
+      ratio = d / c;
+      denom = (d * ratio) + c;
+      x = ((b * ratio) + a) / denom;
+      y = (b - (a * ratio)) / denom;
+    }
+
+  /* Recover infinities and zeros that computed as NaN+iNaN; the only cases
+     are nonzero/zero, infinite/finite, and finite/infinite.  */
+  if (isnan (x) && isnan (y))
+    {
+      if (c == 0.0 && d == 0.0 && (!isnan (a) || !isnan (b)))
+       {
+         x = COPYSIGN (INFINITY, c) * a;
+         y = COPYSIGN (INFINITY, c) * b;
+       }
+      else if ((isinf (a) || isinf (b)) && isfinite (c) && isfinite (d))
+       {
+         a = COPYSIGN (isinf (a) ? 1 : 0, a);
+         b = COPYSIGN (isinf (b) ? 1 : 0, b);
+         x = INFINITY * (a * c + b * d);
+         y = INFINITY * (b * c - a * d);
+       }
+      else if ((isinf (c) || isinf (d)) && isfinite (a) && isfinite (b))
+       {
+         c = COPYSIGN (isinf (c) ? 1 : 0, c);
+         d = COPYSIGN (isinf (d) ? 1 : 0, d);
+         x = 0.0 * (a * c + b * d);
+         y = 0.0 * (b * c - a * d);
+       }
+    }
+
+  __real__ res = x;
+  __imag__ res = y;
+  return res;
+}
Index: libgcc/config/rs6000/_mulkc3.c
===================================================================
--- libgcc/config/rs6000/_mulkc3.c      (revision 0)
+++ libgcc/config/rs6000/_mulkc3.c      (working copy)
@@ -0,0 +1,69 @@
+typedef float KFtype __attribute__ ((mode (KF)));
+typedef __complex float KCtype __attribute__ ((mode (KC)));
+
+#define COPYSIGN(x,y) __builtin_copysignq (x, y)
+#define INFINITY __builtin_infq ()
+#define isnan __builtin_isnan
+#define isinf __builtin_isinf
+
+KCtype
+__mulkc3 (KFtype a, KFtype b, KFtype c, KFtype d)
+{
+  KFtype ac, bd, ad, bc, x, y;
+  KCtype res;
+
+  ac = a * c;
+  bd = b * d;
+  ad = a * d;
+  bc = b * c;
+
+  x = ac - bd;
+  y = ad + bc;
+
+  if (isnan (x) && isnan (y))
+    {
+      /* Recover infinities that computed as NaN + iNaN.  */
+      _Bool recalc = 0;
+      if (isinf (a) || isinf (b))
+       {
+         /* z is infinite.  "Box" the infinity and change NaNs in
+            the other factor to 0.  */
+         a = COPYSIGN (isinf (a) ? 1 : 0, a);
+         b = COPYSIGN (isinf (b) ? 1 : 0, b);
+         if (isnan (c)) c = COPYSIGN (0, c);
+         if (isnan (d)) d = COPYSIGN (0, d);
+          recalc = 1;
+       }
+     if (isinf (c) || isinf (d))
+       {
+         /* w is infinite.  "Box" the infinity and change NaNs in
+            the other factor to 0.  */
+         c = COPYSIGN (isinf (c) ? 1 : 0, c);
+         d = COPYSIGN (isinf (d) ? 1 : 0, d);
+         if (isnan (a)) a = COPYSIGN (0, a);
+         if (isnan (b)) b = COPYSIGN (0, b);
+         recalc = 1;
+       }
+     if (!recalc
+         && (isinf (ac) || isinf (bd)
+             || isinf (ad) || isinf (bc)))
+       {
+         /* Recover infinities from overflow by changing NaNs to 0.  */
+         if (isnan (a)) a = COPYSIGN (0, a);
+         if (isnan (b)) b = COPYSIGN (0, b);
+         if (isnan (c)) c = COPYSIGN (0, c);
+         if (isnan (d)) d = COPYSIGN (0, d);
+         recalc = 1;
+       }
+      if (recalc)
+       {
+         x = INFINITY * (a * c - b * d);
+         y = INFINITY * (a * d + b * c);
+       }
+    }
+
+  __real__ res = x;
+  __imag__ res = y;
+  return res;
+}
+
Index: libgcc/config/rs6000/quad-float128.h
===================================================================
--- libgcc/config/rs6000/quad-float128.h        (revision 238213)
+++ libgcc/config/rs6000/quad-float128.h        (working copy)
@@ -33,6 +33,10 @@
    This define forces it to use KFmode (aka, ieee 128-bit floating point).  */
 #define TF KF
 
+/* We also need TCtype to represent complex ieee 128-bit float for
+   __mulkc3 and __divkc3.  */
+typedef __complex float TCtype __attribute__ ((mode (KC)));
+
 /* Force the use of the VSX instruction set.  */
 #if defined(_ARCH_PPC) && (!defined(__VSX__) || !defined(__FLOAT128__))
 #pragma GCC target ("vsx,float128")
@@ -154,6 +158,10 @@ extern TFtype __floatundikf (UDItype_ppc);
 extern IBM128_TYPE __extendkftf2 (TFtype);
 extern TFtype __trunctfkf2 (IBM128_TYPE);
 
+/* Complex __float128 built on __float128 interfaces.  */
+extern TCtype __mulkc3 (TFtype, TFtype, TFtype, TFtype);
+extern TCtype __divkc3 (TFtype, TFtype, TFtype, TFtype);
+
 /* Implementation of conversions between __ibm128 and __float128, to allow the
    same code to be used on systems with IEEE 128-bit emulation and with IEEE
    128-bit hardware support.  */
Index: libgcc/config/rs6000/t-float128
===================================================================
--- libgcc/config/rs6000/t-float128     (revision 238213)
+++ libgcc/config/rs6000/t-float128     (working copy)
@@ -25,7 +25,7 @@ fp128_softfp_obj      = $(fp128_softfp_static_obj) $(fp
 # New functions for software emulation
 fp128_ppc_funcs                = floattikf floatuntikf fixkfti fixunskfti \
                          extendkftf2-sw trunctfkf2-sw \
-                         sfp-exceptions
+                         sfp-exceptions _mulkc3 _divkc3
 
 fp128_ppc_src          = $(addprefix $(srcdir)/config/rs6000/,$(addsuffix \
                                .c,$(fp128_ppc_funcs)))

[PATCH, rs6000, libgcc] Implement temporary solution for __divkc3 and __mulkc3

Reply via email to

[PATCH, rs6000, libgcc] Implement temporary solution for divkc3 and mulkc3