[PATCH], PR libgcc/83112, Add ifunc support for _mulkc3 and _divkc3

Michael Meissner Mon, 27 Nov 2017 14:38:15 -0800

Unlike the other _Float128 emulation support in the PowerPC libgcc, the support
for _Complex _Float128 multiply and divide doesn't resolve into a single
instruction on the power9 system.


But these two functions do benefit if they are compiled for ISA 3.0 _Float128
hardware instructions, by eliminating calling __{add,sub,mul,div}kf2 through
PLT functions to get to the hardware instruction, and instead using the native
instruction.

I have done bootstrap builds on a little endian power8 system with/without the
patches and there were no regressions in the testsuite.

I have also built the compiler on a little endian power9 prototype system, and
I ran a test that did 100,000,000 passes of complex multiply and adds and then
100,000,000 passes of complex divide and minus.  The test with these fixes was
roughly 45% faster than the test with the unpatched compiler.  I also ran the
test on a power8 system, and it runs using the software emulation.

Can I check this patch into the trunk, assuming that the previously posted
patch for PR libgcc/813112 has also been applied?

2017-11-27  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        PR libgcc/83103
        * config/rs6000/quad-float128.h (TF): Don't define if long double
        is IEEE 128-bit floating point.
        (TCtype): Define as either TCmode or KCmode, depending on whether
        long double is IEEE 128-bit floating point.
        (__mulkc3_sw): Add declarations for software/hardware versions of
        complex multiply/divide.
        (__divkc3_sw): Likewise.
        (__mulkc3_hw): Likewise.
        (__divkc3_hw): Likewise.
        * config/rs6000/_mulkc3.c (_mulkc3): If we are building ifunc
        handlers to switch between using software emulation and hardware
        float128 instructions, build the complex multiply/divide functions
        for both software and hardware support.
        * config/rs6000/_divkc3.c (_divkc3): Likewise.
        * config/rs6000/float128-ifunc.c (__mulkc3_resolve): Likewise.
        (__divkc3_resolve): Likewise.
        (__mulkc3): Likewise.
        (__divkc3): Likewise.
        * config/rs6000/t-float128-hw (fp128_hardfp_src): Likewise.
        (fp128_hw_src): Likewise.
        (fp128_hw_static_obj): Likewise.
        (fp128_hw_shared_obj): Likewise.
        (_mulkc3-hw.c): Likewise.
        (_divkc3-hw.c): Likewise.
        * config/rs6000/t-float128 (clean-float128): Add deleting
        _mulkc3-hw.c and _divkc3-hw.c.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797

Index: libgcc/config/rs6000/quad-float128.h
===================================================================
--- libgcc/config/rs6000/quad-float128.h        (revision 255177)
+++ libgcc/config/rs6000/quad-float128.h        (working copy)
@@ -30,13 +30,20 @@
 /* quad.h defines the TFtype type by:
    typedef float TFtype __attribute__ ((mode (TF)));
 
-   This define forces it to use KFmode (aka, ieee 128-bit floating point).  */
+   This define forces it to use KFmode (aka, ieee 128-bit floating point).
+   However, when the compiler's default is changed so that long double is IEEE
+   128-bit floating point, we need to go back to using TFmode and TCmode.  */
+#ifndef __LONG_DOUBLE_IEEE128__
 #define TF KF
 
 /* We also need TCtype to represent complex ieee 128-bit float for
    __mulkc3 and __divkc3.  */
 typedef __complex float TCtype __attribute__ ((mode (KC)));
 
+#else
+typedef __complex float TCtype __attribute__ ((mode (TC)));
+#endif
+
 /* Force the use of the VSX instruction set.  */
 #if defined(_ARCH_PPC) && (!defined(__VSX__) || !defined(__FLOAT128__))
 #pragma GCC target ("vsx,float128")
@@ -88,6 +95,8 @@ extern TFtype __floatunsikf_sw (USItype_
 extern TFtype __floatundikf_sw (UDItype_ppc);
 extern IBM128_TYPE __extendkftf2_sw (TFtype);
 extern TFtype __trunctfkf2_sw (IBM128_TYPE);
+extern TCtype __mulkc3_sw (TFtype, TFtype, TFtype, TFtype);
+extern TCtype __divkc3_sw (TFtype, TFtype, TFtype, TFtype);
 
 #ifdef _ARCH_PPC64
 /* We do not provide ifunc resolvers for __fixkfti, __fixunskfti, __floattikf,
@@ -128,6 +137,8 @@ extern TFtype __floatunsikf_hw (USItype_
 extern TFtype __floatundikf_hw (UDItype_ppc);
 extern IBM128_TYPE __extendkftf2_hw (TFtype);
 extern TFtype __trunctfkf2_hw (IBM128_TYPE);
+extern TCtype __mulkc3_hw (TFtype, TFtype, TFtype, TFtype);
+extern TCtype __divkc3_hw (TFtype, TFtype, TFtype, TFtype);
 
 /* Ifunc function declarations, to automatically switch between software
    emulation and hardware support.  */
Index: libgcc/config/rs6000/_mulkc3.c
===================================================================
--- libgcc/config/rs6000/_mulkc3.c      (revision 255177)
+++ libgcc/config/rs6000/_mulkc3.c      (working copy)
@@ -31,6 +31,10 @@ typedef __complex float KCtype __attribu
 #define isnan __builtin_isnan
 #define isinf __builtin_isinf
 
+#if defined(FLOAT128_HW_INSNS) && !defined(__mulkc3)
+#define __mulkc3 __mulkc3_sw
+#endif
+
 KCtype
 __mulkc3 (KFtype a, KFtype b, KFtype c, KFtype d)
 {
Index: libgcc/config/rs6000/_divkc3.c
===================================================================
--- libgcc/config/rs6000/_divkc3.c      (revision 255177)
+++ libgcc/config/rs6000/_divkc3.c      (working copy)
@@ -33,6 +33,10 @@ typedef __complex float KCtype __attribu
 #define isinf __builtin_isinf
 #define isfinite __builtin_isfinite
 
+#if defined(FLOAT128_HW_INSNS) && !defined(__divkc3)
+#define __divkc3 __divkc3_sw
+#endif
+
 KCtype
 __divkc3 (KFtype a, KFtype b, KFtype c, KFtype d)
 {
Index: libgcc/config/rs6000/float128-ifunc.c
===================================================================
--- libgcc/config/rs6000/float128-ifunc.c       (revision 255177)
+++ libgcc/config/rs6000/float128-ifunc.c       (working copy)
@@ -71,6 +71,8 @@ typedef TFtype (f128_func_usi_t)(USItype
 typedef TFtype (f128_func_udi_t)(UDItype_ppc);
 typedef IBM128_TYPE (ibm_func_f128_t)(TFtype);
 typedef TFtype (f128_func_ibm_t)(IBM128_TYPE);
+typedef TCtype (cf128_func_f128_f128_f128_f128_t) (TFtype, TFtype, TFtype,
+                                                  TFtype);
 
 static f128_func_f128_f128_t *__addkf3_resolve (void);
 static f128_func_f128_f128_t *__subkf3_resolve (void);
@@ -98,6 +100,8 @@ static f128_func_usi_t *__floatunsikf_re
 static f128_func_udi_t *__floatundikf_resolve (void);
 static ibm_func_f128_t *__extendkftf2_resolve (void);
 static f128_func_ibm_t *__trunctfkf2_resolve (void);
+static cf128_func_f128_f128_f128_f128_t *__mulkc3_resolve (void);
+static cf128_func_f128_f128_f128_f128_t *__divkc3_resolve (void);
 
 static f128_func_f128_f128_t *
 __addkf3_resolve (void)
@@ -210,7 +214,19 @@ __extendkftf2_resolve (void)
 static f128_func_ibm_t *
 __trunctfkf2_resolve (void)
 {
-  return (void *) SW_OR_HW (__trunctfkf2_sw, __trunctfkf2_hw);
+  return SW_OR_HW (__trunctfkf2_sw, __trunctfkf2_hw);
+}
+
+static cf128_func_f128_f128_f128_f128_t *
+__mulkc3_resolve (void)
+{
+  return SW_OR_HW (__mulkc3_sw, __mulkc3_hw);
+}
+
+static cf128_func_f128_f128_f128_f128_t *
+__divkc3_resolve (void)
+{
+  return SW_OR_HW (__divkc3_sw, __divkc3_hw);
 }
 
 static cmp_func_f128_f128_t *
@@ -338,3 +354,9 @@ IBM128_TYPE __extendkftf2 (TFtype)
 
 TFtype __trunctfkf2 (IBM128_TYPE)
   __attribute__ ((__ifunc__ ("__trunctfkf2_resolve")));
+
+TCtype __mulkc3 (TFtype, TFtype, TFtype, TFtype)
+  __attribute__ ((__ifunc__ ("__mulkc3_resolve")));
+
+TCtype __divkc3 (TFtype, TFtype, TFtype, TFtype)
+  __attribute__ ((__ifunc__ ("__divkc3_resolve")));
Index: libgcc/config/rs6000/t-float128-hw
===================================================================
--- libgcc/config/rs6000/t-float128-hw  (revision 255177)
+++ libgcc/config/rs6000/t-float128-hw  (working copy)
@@ -5,10 +5,12 @@
 FLOAT128_HW_INSNS      = -DFLOAT128_HW_INSNS
 
 # New functions for hardware support
-fp128_hw_funcs         = float128-hw
-fp128_hw_src           = $(srcdir)/config/rs6000/float128-hw.c
-fp128_hw_static_obj    = float128-hw$(objext)
-fp128_hw_shared_obj    = float128-hw_s$(objext)
+fp128_hardfp_src       = _mulkc3-hw.c _divkc3-hw.c
+fp128_hw_funcs         = float128-hw _mulkc3-hw _divkc3-hw
+fp128_hw_src           = $(srcdir)/config/rs6000/float128-hw.c _mulkc3-hw.c \
+                         _divkc3-hw.c
+fp128_hw_static_obj    = $(addsuffix $(objext),$(fp128_hw_funcs))
+fp128_hw_shared_obj    = $(addsuffix _s$(objext),$(fp128_hw_funcs))
 fp128_hw_obj           = $(fp128_hw_static_obj) $(fp128_hw_shared_obj)
 
 fp128_ifunc_funcs      = float128-ifunc
@@ -33,3 +35,13 @@ $(fp128_hw_obj)               : $(srcdir)/config/rs6
 
 $(fp128_ifunc_obj)      : INTERNAL_CFLAGS += $(FP128_CFLAGS_SW)
 $(fp128_ifunc_obj)      : $(srcdir)/config/rs6000/t-float128-hw
+
+_mulkc3-hw.c: $(srcdir)/config/rs6000/_mulkc3.c
+       rm -rf _mulkc3.c
+       (echo "#define __mulkc3 __mulkc3_hw"; \
+        cat $(srcdir)/config/rs6000/_mulkc3.c) > _mulkc3-hw.c
+
+_divkc3-hw.c: $(srcdir)/config/rs6000/_divkc3.c
+       rm -rf _divkc3.c
+       (echo "#define __divkc3 __divkc3_hw"; \
+        cat $(srcdir)/config/rs6000/_divkc3.c) > _divkc3-hw.c
Index: libgcc/config/rs6000/t-float128
===================================================================
--- libgcc/config/rs6000/t-float128     (revision 255177)
+++ libgcc/config/rs6000/t-float128     (working copy)
@@ -86,7 +86,7 @@ test:
        for x in $(fp128_obj); do echo "    $$x"; done;
 
 clean-float128:
-       rm -rf $(fp128_softfp_src)
+       rm -rf $(fp128_softfp_src) $(fp128_hardfp_src)
        @$(MULTICLEAN) multi-clean DO=clean-float128
 
 # For now, only put it in the static library

[PATCH], PR libgcc/83112, Add ifunc support for _mulkc3 and _divkc3

Reply via email to