On UCRT, we pass calls to sin()/cos() to the CRT; the UCRT versions of these functions are faster than our x87 implementations.
The same also goes for sincos(); calling the UCRT sin() and cos() from UCRT separately is almost 3x as fast as calling the x87 sincos() implementation. Using assembly for implementing these functions; a plain C version can be optimized by the compiler back into a plain call to sincos() (GCC does by default, Clang does it if compiling with -ffast-math), see 3f40dd3254582722761606c7c99d658f952002d9 for earlier precedent for arm/aarch64. --- mingw-w64-crt/Makefile.am | 9 ++++-- mingw-w64-crt/math/x86/sincos.S | 47 ++++++++++++++++++++++++++++++++ mingw-w64-crt/math/x86/sincosf.S | 46 +++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 mingw-w64-crt/math/x86/sincos.S create mode 100644 mingw-w64-crt/math/x86/sincosf.S diff --git a/mingw-w64-crt/Makefile.am b/mingw-w64-crt/Makefile.am index 232aa7c0f..39fbdcbbc 100644 --- a/mingw-w64-crt/Makefile.am +++ b/mingw-w64-crt/Makefile.am @@ -215,6 +215,7 @@ src_msvcrt_common_add_x86=\ math/x86/atanh.c math/x86/atanhf.c \ math/x86/ceil.S \ math/x86/cos.c \ + math/x86/cossin.c \ math/x86/exp.c \ math/x86/exp2.S math/x86/exp2f.S \ math/x86/expm1.c math/x86/expm1f.c \ @@ -460,6 +461,8 @@ src_ucrtbase32=\ math/x86/cosf.c \ math/x86/floorf.S \ math/x86/fmodf.c \ + math/x86/sincos.S \ + math/x86/sincosf.S \ math/x86/sinf.c \ math/x86/tanf.c @@ -468,7 +471,9 @@ src_ucrtbase64=\ $(src_ucrtbase) \ math/fabsf.c \ math/nextafterl.c \ - math/nexttoward.c math/nexttowardf.c + math/nexttoward.c math/nexttowardf.c \ + math/x86/sincos.S \ + math/x86/sincosf.S # Files included in libucrt*.a on arm32 src_ucrtbasearm32=\ @@ -1010,7 +1015,7 @@ src_libmingwex_x86=\ math/x86/atanhl.c math/x86/atanl.c \ math/x86/ceill.S math/x86/copysignl.S \ math/x86/cos.def.h math/x86/cosl.c math/x86/cosl_internal.S \ - math/x86/cossin.c math/x86/cossinl.c \ + math/x86/cossinl.c \ math/x86/exp2l.S math/x86/exp.def.h math/x86/expl.c math/x86/expm1.def.h math/x86/expm1l.c \ math/x86/fastmath.h math/x86/floorl.S \ math/x86/fmodl.c math/x86/fucom.c \ diff --git a/mingw-w64-crt/math/x86/sincos.S b/mingw-w64-crt/math/x86/sincos.S new file mode 100644 index 000000000..f0846983a --- /dev/null +++ b/mingw-w64-crt/math/x86/sincos.S @@ -0,0 +1,47 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the mingw-w64 runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ +#include <_mingw_mac.h> + + .file "sincos.S" + .text + .p2align 4 + .globl __MINGW_USYMBOL(sincos) + .def __MINGW_USYMBOL(sincos); .scl 2; .type 32; .endef +__MINGW_USYMBOL(sincos): +#ifdef __x86_64__ + pushq %rsi + pushq %rdi + subq $56, %rsp + movups %xmm6, 32(%rsp) + movq %r8, %rsi + movq %rdx, %rdi + movupd %xmm0, %xmm6 + call sin + movsd %xmm0, (%rdi) + movupd %xmm6, %xmm0 + call cos + movsd %xmm0, (%rsi) + movups 32(%rsp), %xmm6 + addq $56, %rsp + popq %rdi + popq %rsi + retq +#else + subl $44, %esp + fldl 48(%esp) + fstl (%esp) + fstpl 24(%esp) + call _sin + movl 56(%esp), %eax + fstpl (%eax) + fldl 24(%esp) + fstpl (%esp) + call _cos + movl 60(%esp), %eax + fstpl (%eax) + addl $44, %esp + ret +#endif diff --git a/mingw-w64-crt/math/x86/sincosf.S b/mingw-w64-crt/math/x86/sincosf.S new file mode 100644 index 000000000..21ee30eef --- /dev/null +++ b/mingw-w64-crt/math/x86/sincosf.S @@ -0,0 +1,46 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the mingw-w64 runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ +#include <_mingw_mac.h> + + .file "sincosf.S" + .text + .p2align 4 + .globl __MINGW_USYMBOL(sincosf) + .def __MINGW_USYMBOL(sincosf); .scl 2; .type 32; .endef +__MINGW_USYMBOL(sincosf): +#ifdef __x86_64__ + pushq %rsi + pushq %rdi + subq $56, %rsp + movups %xmm6, 32(%rsp) + movq %r8, %rsi + movq %rdx, %rdi + movups %xmm0, %xmm6 + call sinf + movss %xmm0, (%rdi) + movaps %xmm6, %xmm0 + call cosf + movss %xmm0, (%rsi) + movaps 32(%rsp), %xmm6 + addq $56, %rsp + popq %rdi + popq %rsi + retq +#else + subl $28, %esp + flds 32(%esp) + fstps (%esp) + call _sinf + movl 36(%esp), %eax + fstps (%eax) + flds 32(%esp) + fstps (%esp) + call _cosf + movl 40(%esp), %eax + fstps (%eax) + addl $28, %esp + ret +#endif -- 2.43.0 _______________________________________________ Mingw-w64-public mailing list Mingw-w64-public@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/mingw-w64-public