On UCRT, we pass calls to sin()/cos() to the CRT; the UCRT versions of these functions are faster than our x87 implementations.
The same also goes for sincos(); calling the UCRT sin() and cos() from UCRT separately is almost 3x as fast as calling the x87 sincos() implementation. Using assembly for implementing these functions; a plain C version can be optimized by the compiler back into a plain call to sincos() (GCC does by default, Clang does it if compiling with -ffast-math), see 3f40dd3254582722761606c7c99d658f952002d9 for earlier precedent for arm/aarch64. --- Updated the code to exactly match the output from Clang on x86_64 (the previous version had manual touch-ups to some SSE moves). Now the x86_64 routines is exactly the output of Clang on x86_64, and exactly the output of GCC on i386. (Clang defaults to using SSE2 on i386 targets.) Ideally, we wouldn't need to use assembly for things like this; ideally we should set -fno-builtin for the relevant source files. However, with automake, it is not easy to set such an option specifically for one individual file, and it is not possible to activate specifically -fno-builtin with either a pragma or an optimize attribute in the source files. --- mingw-w64-crt/Makefile.am | 9 ++++-- mingw-w64-crt/math/x86/sincos.S | 47 ++++++++++++++++++++++++++++++++ mingw-w64-crt/math/x86/sincosf.S | 46 +++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 mingw-w64-crt/math/x86/sincos.S create mode 100644 mingw-w64-crt/math/x86/sincosf.S diff --git a/mingw-w64-crt/Makefile.am b/mingw-w64-crt/Makefile.am index 232aa7c0f..39fbdcbbc 100644 --- a/mingw-w64-crt/Makefile.am +++ b/mingw-w64-crt/Makefile.am @@ -215,6 +215,7 @@ src_msvcrt_common_add_x86=\ math/x86/atanh.c math/x86/atanhf.c \ math/x86/ceil.S \ math/x86/cos.c \ + math/x86/cossin.c \ math/x86/exp.c \ math/x86/exp2.S math/x86/exp2f.S \ math/x86/expm1.c math/x86/expm1f.c \ @@ -460,6 +461,8 @@ src_ucrtbase32=\ math/x86/cosf.c \ math/x86/floorf.S \ math/x86/fmodf.c \ + math/x86/sincos.S \ + math/x86/sincosf.S \ math/x86/sinf.c \ math/x86/tanf.c @@ -468,7 +471,9 @@ src_ucrtbase64=\ $(src_ucrtbase) \ math/fabsf.c \ math/nextafterl.c \ - math/nexttoward.c math/nexttowardf.c + math/nexttoward.c math/nexttowardf.c \ + math/x86/sincos.S \ + math/x86/sincosf.S # Files included in libucrt*.a on arm32 src_ucrtbasearm32=\ @@ -1010,7 +1015,7 @@ src_libmingwex_x86=\ math/x86/atanhl.c math/x86/atanl.c \ math/x86/ceill.S math/x86/copysignl.S \ math/x86/cos.def.h math/x86/cosl.c math/x86/cosl_internal.S \ - math/x86/cossin.c math/x86/cossinl.c \ + math/x86/cossinl.c \ math/x86/exp2l.S math/x86/exp.def.h math/x86/expl.c math/x86/expm1.def.h math/x86/expm1l.c \ math/x86/fastmath.h math/x86/floorl.S \ math/x86/fmodl.c math/x86/fucom.c \ diff --git a/mingw-w64-crt/math/x86/sincos.S b/mingw-w64-crt/math/x86/sincos.S new file mode 100644 index 000000000..410e13edb --- /dev/null +++ b/mingw-w64-crt/math/x86/sincos.S @@ -0,0 +1,47 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the mingw-w64 runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ +#include <_mingw_mac.h> + + .file "sincos.S" + .text + .p2align 4 + .globl __MINGW_USYMBOL(sincos) + .def __MINGW_USYMBOL(sincos); .scl 2; .type 32; .endef +__MINGW_USYMBOL(sincos): +#ifdef __x86_64__ + pushq %rsi + pushq %rdi + subq $56, %rsp + movaps %xmm6, 32(%rsp) + movq %r8, %rsi + movq %rdx, %rdi + movaps %xmm0, %xmm6 + call sin + movsd %xmm0, (%rdi) + movaps %xmm6, %xmm0 + call cos + movsd %xmm0, (%rsi) + movaps 32(%rsp), %xmm6 + addq $56, %rsp + popq %rdi + popq %rsi + retq +#else + subl $44, %esp + fldl 48(%esp) + fstl (%esp) + fstpl 24(%esp) + call _sin + movl 56(%esp), %eax + fstpl (%eax) + fldl 24(%esp) + fstpl (%esp) + call _cos + movl 60(%esp), %eax + fstpl (%eax) + addl $44, %esp + ret +#endif diff --git a/mingw-w64-crt/math/x86/sincosf.S b/mingw-w64-crt/math/x86/sincosf.S new file mode 100644 index 000000000..19080b13d --- /dev/null +++ b/mingw-w64-crt/math/x86/sincosf.S @@ -0,0 +1,46 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the mingw-w64 runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ +#include <_mingw_mac.h> + + .file "sincosf.S" + .text + .p2align 4 + .globl __MINGW_USYMBOL(sincosf) + .def __MINGW_USYMBOL(sincosf); .scl 2; .type 32; .endef +__MINGW_USYMBOL(sincosf): +#ifdef __x86_64__ + pushq %rsi + pushq %rdi + subq $56, %rsp + movaps %xmm6, 32(%rsp) + movq %r8, %rsi + movq %rdx, %rdi + movaps %xmm0, %xmm6 + call sinf + movss %xmm0, (%rdi) + movaps %xmm6, %xmm0 + call cosf + movss %xmm0, (%rsi) + movaps 32(%rsp), %xmm6 + addq $56, %rsp + popq %rdi + popq %rsi + retq +#else + subl $28, %esp + flds 32(%esp) + fstps (%esp) + call _sinf + movl 36(%esp), %eax + fstps (%eax) + flds 32(%esp) + fstps (%esp) + call _cosf + movl 40(%esp), %eax + fstps (%eax) + addl $28, %esp + ret +#endif -- 2.43.0 _______________________________________________ Mingw-w64-public mailing list Mingw-w64-public@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/mingw-w64-public