Unlike the other _Float128 emulation support in the PowerPC libgcc, the support for _Complex _Float128 multiply and divide doesn't resolve into a single instruction on the power9 system.
But these two functions do benefit if they are compiled for ISA 3.0 _Float128 hardware instructions, by eliminating calling __{add,sub,mul,div}kf2 through PLT functions to get to the hardware instruction, and instead using the native instruction. I have done bootstrap builds on a little endian power8 system with/without the patches and there were no regressions in the testsuite. I have also built the compiler on a little endian power9 prototype system, and I ran a test that did 100,000,000 passes of complex multiply and adds and then 100,000,000 passes of complex divide and minus. The test with these fixes was roughly 45% faster than the test with the unpatched compiler. I also ran the test on a power8 system, and it runs using the software emulation. Can I check this patch into the trunk, assuming that the previously posted patch for PR libgcc/813112 has also been applied? 2017-11-27 Michael Meissner <meiss...@linux.vnet.ibm.com> PR libgcc/83103 * config/rs6000/quad-float128.h (TF): Don't define if long double is IEEE 128-bit floating point. (TCtype): Define as either TCmode or KCmode, depending on whether long double is IEEE 128-bit floating point. (__mulkc3_sw): Add declarations for software/hardware versions of complex multiply/divide. (__divkc3_sw): Likewise. (__mulkc3_hw): Likewise. (__divkc3_hw): Likewise. * config/rs6000/_mulkc3.c (_mulkc3): If we are building ifunc handlers to switch between using software emulation and hardware float128 instructions, build the complex multiply/divide functions for both software and hardware support. * config/rs6000/_divkc3.c (_divkc3): Likewise. * config/rs6000/float128-ifunc.c (__mulkc3_resolve): Likewise. (__divkc3_resolve): Likewise. (__mulkc3): Likewise. (__divkc3): Likewise. * config/rs6000/t-float128-hw (fp128_hardfp_src): Likewise. (fp128_hw_src): Likewise. (fp128_hw_static_obj): Likewise. (fp128_hw_shared_obj): Likewise. (_mulkc3-hw.c): Likewise. (_divkc3-hw.c): Likewise. * config/rs6000/t-float128 (clean-float128): Add deleting _mulkc3-hw.c and _divkc3-hw.c. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: libgcc/config/rs6000/quad-float128.h =================================================================== --- libgcc/config/rs6000/quad-float128.h (revision 255177) +++ libgcc/config/rs6000/quad-float128.h (working copy) @@ -30,13 +30,20 @@ /* quad.h defines the TFtype type by: typedef float TFtype __attribute__ ((mode (TF))); - This define forces it to use KFmode (aka, ieee 128-bit floating point). */ + This define forces it to use KFmode (aka, ieee 128-bit floating point). + However, when the compiler's default is changed so that long double is IEEE + 128-bit floating point, we need to go back to using TFmode and TCmode. */ +#ifndef __LONG_DOUBLE_IEEE128__ #define TF KF /* We also need TCtype to represent complex ieee 128-bit float for __mulkc3 and __divkc3. */ typedef __complex float TCtype __attribute__ ((mode (KC))); +#else +typedef __complex float TCtype __attribute__ ((mode (TC))); +#endif + /* Force the use of the VSX instruction set. */ #if defined(_ARCH_PPC) && (!defined(__VSX__) || !defined(__FLOAT128__)) #pragma GCC target ("vsx,float128") @@ -88,6 +95,8 @@ extern TFtype __floatunsikf_sw (USItype_ extern TFtype __floatundikf_sw (UDItype_ppc); extern IBM128_TYPE __extendkftf2_sw (TFtype); extern TFtype __trunctfkf2_sw (IBM128_TYPE); +extern TCtype __mulkc3_sw (TFtype, TFtype, TFtype, TFtype); +extern TCtype __divkc3_sw (TFtype, TFtype, TFtype, TFtype); #ifdef _ARCH_PPC64 /* We do not provide ifunc resolvers for __fixkfti, __fixunskfti, __floattikf, @@ -128,6 +137,8 @@ extern TFtype __floatunsikf_hw (USItype_ extern TFtype __floatundikf_hw (UDItype_ppc); extern IBM128_TYPE __extendkftf2_hw (TFtype); extern TFtype __trunctfkf2_hw (IBM128_TYPE); +extern TCtype __mulkc3_hw (TFtype, TFtype, TFtype, TFtype); +extern TCtype __divkc3_hw (TFtype, TFtype, TFtype, TFtype); /* Ifunc function declarations, to automatically switch between software emulation and hardware support. */ Index: libgcc/config/rs6000/_mulkc3.c =================================================================== --- libgcc/config/rs6000/_mulkc3.c (revision 255177) +++ libgcc/config/rs6000/_mulkc3.c (working copy) @@ -31,6 +31,10 @@ typedef __complex float KCtype __attribu #define isnan __builtin_isnan #define isinf __builtin_isinf +#if defined(FLOAT128_HW_INSNS) && !defined(__mulkc3) +#define __mulkc3 __mulkc3_sw +#endif + KCtype __mulkc3 (KFtype a, KFtype b, KFtype c, KFtype d) { Index: libgcc/config/rs6000/_divkc3.c =================================================================== --- libgcc/config/rs6000/_divkc3.c (revision 255177) +++ libgcc/config/rs6000/_divkc3.c (working copy) @@ -33,6 +33,10 @@ typedef __complex float KCtype __attribu #define isinf __builtin_isinf #define isfinite __builtin_isfinite +#if defined(FLOAT128_HW_INSNS) && !defined(__divkc3) +#define __divkc3 __divkc3_sw +#endif + KCtype __divkc3 (KFtype a, KFtype b, KFtype c, KFtype d) { Index: libgcc/config/rs6000/float128-ifunc.c =================================================================== --- libgcc/config/rs6000/float128-ifunc.c (revision 255177) +++ libgcc/config/rs6000/float128-ifunc.c (working copy) @@ -71,6 +71,8 @@ typedef TFtype (f128_func_usi_t)(USItype typedef TFtype (f128_func_udi_t)(UDItype_ppc); typedef IBM128_TYPE (ibm_func_f128_t)(TFtype); typedef TFtype (f128_func_ibm_t)(IBM128_TYPE); +typedef TCtype (cf128_func_f128_f128_f128_f128_t) (TFtype, TFtype, TFtype, + TFtype); static f128_func_f128_f128_t *__addkf3_resolve (void); static f128_func_f128_f128_t *__subkf3_resolve (void); @@ -98,6 +100,8 @@ static f128_func_usi_t *__floatunsikf_re static f128_func_udi_t *__floatundikf_resolve (void); static ibm_func_f128_t *__extendkftf2_resolve (void); static f128_func_ibm_t *__trunctfkf2_resolve (void); +static cf128_func_f128_f128_f128_f128_t *__mulkc3_resolve (void); +static cf128_func_f128_f128_f128_f128_t *__divkc3_resolve (void); static f128_func_f128_f128_t * __addkf3_resolve (void) @@ -210,7 +214,19 @@ __extendkftf2_resolve (void) static f128_func_ibm_t * __trunctfkf2_resolve (void) { - return (void *) SW_OR_HW (__trunctfkf2_sw, __trunctfkf2_hw); + return SW_OR_HW (__trunctfkf2_sw, __trunctfkf2_hw); +} + +static cf128_func_f128_f128_f128_f128_t * +__mulkc3_resolve (void) +{ + return SW_OR_HW (__mulkc3_sw, __mulkc3_hw); +} + +static cf128_func_f128_f128_f128_f128_t * +__divkc3_resolve (void) +{ + return SW_OR_HW (__divkc3_sw, __divkc3_hw); } static cmp_func_f128_f128_t * @@ -338,3 +354,9 @@ IBM128_TYPE __extendkftf2 (TFtype) TFtype __trunctfkf2 (IBM128_TYPE) __attribute__ ((__ifunc__ ("__trunctfkf2_resolve"))); + +TCtype __mulkc3 (TFtype, TFtype, TFtype, TFtype) + __attribute__ ((__ifunc__ ("__mulkc3_resolve"))); + +TCtype __divkc3 (TFtype, TFtype, TFtype, TFtype) + __attribute__ ((__ifunc__ ("__divkc3_resolve"))); Index: libgcc/config/rs6000/t-float128-hw =================================================================== --- libgcc/config/rs6000/t-float128-hw (revision 255177) +++ libgcc/config/rs6000/t-float128-hw (working copy) @@ -5,10 +5,12 @@ FLOAT128_HW_INSNS = -DFLOAT128_HW_INSNS # New functions for hardware support -fp128_hw_funcs = float128-hw -fp128_hw_src = $(srcdir)/config/rs6000/float128-hw.c -fp128_hw_static_obj = float128-hw$(objext) -fp128_hw_shared_obj = float128-hw_s$(objext) +fp128_hardfp_src = _mulkc3-hw.c _divkc3-hw.c +fp128_hw_funcs = float128-hw _mulkc3-hw _divkc3-hw +fp128_hw_src = $(srcdir)/config/rs6000/float128-hw.c _mulkc3-hw.c \ + _divkc3-hw.c +fp128_hw_static_obj = $(addsuffix $(objext),$(fp128_hw_funcs)) +fp128_hw_shared_obj = $(addsuffix _s$(objext),$(fp128_hw_funcs)) fp128_hw_obj = $(fp128_hw_static_obj) $(fp128_hw_shared_obj) fp128_ifunc_funcs = float128-ifunc @@ -33,3 +35,13 @@ $(fp128_hw_obj) : $(srcdir)/config/rs6 $(fp128_ifunc_obj) : INTERNAL_CFLAGS += $(FP128_CFLAGS_SW) $(fp128_ifunc_obj) : $(srcdir)/config/rs6000/t-float128-hw + +_mulkc3-hw.c: $(srcdir)/config/rs6000/_mulkc3.c + rm -rf _mulkc3.c + (echo "#define __mulkc3 __mulkc3_hw"; \ + cat $(srcdir)/config/rs6000/_mulkc3.c) > _mulkc3-hw.c + +_divkc3-hw.c: $(srcdir)/config/rs6000/_divkc3.c + rm -rf _divkc3.c + (echo "#define __divkc3 __divkc3_hw"; \ + cat $(srcdir)/config/rs6000/_divkc3.c) > _divkc3-hw.c Index: libgcc/config/rs6000/t-float128 =================================================================== --- libgcc/config/rs6000/t-float128 (revision 255177) +++ libgcc/config/rs6000/t-float128 (working copy) @@ -86,7 +86,7 @@ test: for x in $(fp128_obj); do echo " $$x"; done; clean-float128: - rm -rf $(fp128_softfp_src) + rm -rf $(fp128_softfp_src) $(fp128_hardfp_src) @$(MULTICLEAN) multi-clean DO=clean-float128 # For now, only put it in the static library