On UCRT, we pass calls to sin()/cos() to the CRT; the UCRT
versions of these functions are faster than our x87 implementations.

The same also goes for sincos(); calling the UCRT sin() and cos()
from UCRT separately is almost 3x as fast as calling the x87
sincos() implementation.

Using assembly for implementing these functions; a plain C
version can be optimized by the compiler back into a plain call
to sincos() (GCC does by default, Clang does it if compiling with
-ffast-math), see 3f40dd3254582722761606c7c99d658f952002d9 for
earlier precedent for arm/aarch64.
---
 mingw-w64-crt/Makefile.am        |  9 ++++--
 mingw-w64-crt/math/x86/sincos.S  | 47 ++++++++++++++++++++++++++++++++
 mingw-w64-crt/math/x86/sincosf.S | 46 +++++++++++++++++++++++++++++++
 3 files changed, 100 insertions(+), 2 deletions(-)
 create mode 100644 mingw-w64-crt/math/x86/sincos.S
 create mode 100644 mingw-w64-crt/math/x86/sincosf.S

diff --git a/mingw-w64-crt/Makefile.am b/mingw-w64-crt/Makefile.am
index 232aa7c0f..39fbdcbbc 100644
--- a/mingw-w64-crt/Makefile.am
+++ b/mingw-w64-crt/Makefile.am
@@ -215,6 +215,7 @@ src_msvcrt_common_add_x86=\
   math/x86/atanh.c math/x86/atanhf.c \
   math/x86/ceil.S \
   math/x86/cos.c \
+  math/x86/cossin.c \
   math/x86/exp.c \
   math/x86/exp2.S math/x86/exp2f.S \
   math/x86/expm1.c math/x86/expm1f.c \
@@ -460,6 +461,8 @@ src_ucrtbase32=\
   math/x86/cosf.c \
   math/x86/floorf.S \
   math/x86/fmodf.c \
+  math/x86/sincos.S \
+  math/x86/sincosf.S \
   math/x86/sinf.c \
   math/x86/tanf.c
 
@@ -468,7 +471,9 @@ src_ucrtbase64=\
   $(src_ucrtbase) \
   math/fabsf.c \
   math/nextafterl.c \
-  math/nexttoward.c math/nexttowardf.c
+  math/nexttoward.c math/nexttowardf.c \
+  math/x86/sincos.S \
+  math/x86/sincosf.S
 
 # Files included in libucrt*.a on arm32
 src_ucrtbasearm32=\
@@ -1010,7 +1015,7 @@ src_libmingwex_x86=\
   math/x86/atanhl.c         math/x86/atanl.c          \
   math/x86/ceill.S          math/x86/copysignl.S      \
   math/x86/cos.def.h        math/x86/cosl.c           math/x86/cosl_internal.S 
 \
-  math/x86/cossin.c         math/x86/cossinl.c        \
+  math/x86/cossinl.c        \
   math/x86/exp2l.S          math/x86/exp.def.h        math/x86/expl.c          
 math/x86/expm1.def.h      math/x86/expm1l.c         \
   math/x86/fastmath.h       math/x86/floorl.S         \
   math/x86/fmodl.c          math/x86/fucom.c          \
diff --git a/mingw-w64-crt/math/x86/sincos.S b/mingw-w64-crt/math/x86/sincos.S
new file mode 100644
index 000000000..f0846983a
--- /dev/null
+++ b/mingw-w64-crt/math/x86/sincos.S
@@ -0,0 +1,47 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+#include <_mingw_mac.h>
+
+       .file   "sincos.S"
+       .text
+       .p2align 4
+       .globl __MINGW_USYMBOL(sincos)
+       .def    __MINGW_USYMBOL(sincos);        .scl    2;      .type   32;     
.endef
+__MINGW_USYMBOL(sincos):
+#ifdef __x86_64__
+       pushq   %rsi
+       pushq   %rdi
+       subq    $56, %rsp
+       movups  %xmm6, 32(%rsp)
+       movq    %r8, %rsi
+       movq    %rdx, %rdi
+       movupd  %xmm0, %xmm6
+       call    sin
+       movsd   %xmm0, (%rdi)
+       movupd  %xmm6, %xmm0
+       call    cos
+       movsd   %xmm0, (%rsi)
+       movups  32(%rsp), %xmm6
+       addq    $56, %rsp
+       popq    %rdi
+       popq    %rsi
+       retq
+#else
+       subl    $44, %esp
+       fldl    48(%esp)
+       fstl    (%esp)
+       fstpl   24(%esp)
+       call    _sin
+       movl    56(%esp), %eax
+       fstpl   (%eax)
+       fldl    24(%esp)
+       fstpl   (%esp)
+       call    _cos
+       movl    60(%esp), %eax
+       fstpl   (%eax)
+       addl    $44, %esp
+       ret
+#endif
diff --git a/mingw-w64-crt/math/x86/sincosf.S b/mingw-w64-crt/math/x86/sincosf.S
new file mode 100644
index 000000000..21ee30eef
--- /dev/null
+++ b/mingw-w64-crt/math/x86/sincosf.S
@@ -0,0 +1,46 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+#include <_mingw_mac.h>
+
+       .file   "sincosf.S"
+       .text
+       .p2align 4
+       .globl __MINGW_USYMBOL(sincosf)
+       .def    __MINGW_USYMBOL(sincosf);       .scl    2;      .type   32;     
.endef
+__MINGW_USYMBOL(sincosf):
+#ifdef __x86_64__
+       pushq   %rsi
+       pushq   %rdi
+       subq    $56, %rsp
+       movups  %xmm6, 32(%rsp)
+       movq    %r8, %rsi
+       movq    %rdx, %rdi
+       movups  %xmm0, %xmm6
+       call    sinf
+       movss   %xmm0, (%rdi)
+       movaps  %xmm6, %xmm0
+       call    cosf
+       movss   %xmm0, (%rsi)
+       movaps  32(%rsp), %xmm6
+       addq    $56, %rsp
+       popq    %rdi
+       popq    %rsi
+       retq
+#else
+       subl    $28, %esp
+       flds    32(%esp)
+       fstps   (%esp)
+       call    _sinf
+       movl    36(%esp), %eax
+       fstps   (%eax)
+       flds    32(%esp)
+       fstps   (%esp)
+       call    _cosf
+       movl    40(%esp), %eax
+       fstps   (%eax)
+       addl    $28, %esp
+       ret
+#endif
-- 
2.43.0



_______________________________________________
Mingw-w64-public mailing list
Mingw-w64-public@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public

Reply via email to