On UCRT, we pass calls to sin()/cos() to the CRT; the UCRT
versions of these functions are faster than our x87 implementations.

The same also goes for sincos(); calling the UCRT sin() and cos()
from UCRT separately is almost 3x as fast as calling the x87
sincos() implementation.

Using assembly for implementing these functions; a plain C
version can be optimized by the compiler back into a plain call
to sincos() (GCC does by default, Clang does it if compiling with
-ffast-math), see 3f40dd3254582722761606c7c99d658f952002d9 for
earlier precedent for arm/aarch64.
---
Updated the code to exactly match the output from Clang on x86_64
(the previous version had manual touch-ups to some SSE moves).

Now the x86_64 routines is exactly the output of Clang on x86_64,
and exactly the output of GCC on i386. (Clang defaults to using
SSE2 on i386 targets.)

Ideally, we wouldn't need to use assembly for things like this;
ideally we should set -fno-builtin for the relevant source files.
However, with automake, it is not easy to set such an option
specifically for one individual file, and it is not possible to
activate specifically -fno-builtin with either a pragma or an
optimize attribute in the source files.
---
 mingw-w64-crt/Makefile.am        |  9 ++++--
 mingw-w64-crt/math/x86/sincos.S  | 47 ++++++++++++++++++++++++++++++++
 mingw-w64-crt/math/x86/sincosf.S | 46 +++++++++++++++++++++++++++++++
 3 files changed, 100 insertions(+), 2 deletions(-)
 create mode 100644 mingw-w64-crt/math/x86/sincos.S
 create mode 100644 mingw-w64-crt/math/x86/sincosf.S

diff --git a/mingw-w64-crt/Makefile.am b/mingw-w64-crt/Makefile.am
index 232aa7c0f..39fbdcbbc 100644
--- a/mingw-w64-crt/Makefile.am
+++ b/mingw-w64-crt/Makefile.am
@@ -215,6 +215,7 @@ src_msvcrt_common_add_x86=\
   math/x86/atanh.c math/x86/atanhf.c \
   math/x86/ceil.S \
   math/x86/cos.c \
+  math/x86/cossin.c \
   math/x86/exp.c \
   math/x86/exp2.S math/x86/exp2f.S \
   math/x86/expm1.c math/x86/expm1f.c \
@@ -460,6 +461,8 @@ src_ucrtbase32=\
   math/x86/cosf.c \
   math/x86/floorf.S \
   math/x86/fmodf.c \
+  math/x86/sincos.S \
+  math/x86/sincosf.S \
   math/x86/sinf.c \
   math/x86/tanf.c
 
@@ -468,7 +471,9 @@ src_ucrtbase64=\
   $(src_ucrtbase) \
   math/fabsf.c \
   math/nextafterl.c \
-  math/nexttoward.c math/nexttowardf.c
+  math/nexttoward.c math/nexttowardf.c \
+  math/x86/sincos.S \
+  math/x86/sincosf.S
 
 # Files included in libucrt*.a on arm32
 src_ucrtbasearm32=\
@@ -1010,7 +1015,7 @@ src_libmingwex_x86=\
   math/x86/atanhl.c         math/x86/atanl.c          \
   math/x86/ceill.S          math/x86/copysignl.S      \
   math/x86/cos.def.h        math/x86/cosl.c           math/x86/cosl_internal.S 
 \
-  math/x86/cossin.c         math/x86/cossinl.c        \
+  math/x86/cossinl.c        \
   math/x86/exp2l.S          math/x86/exp.def.h        math/x86/expl.c          
 math/x86/expm1.def.h      math/x86/expm1l.c         \
   math/x86/fastmath.h       math/x86/floorl.S         \
   math/x86/fmodl.c          math/x86/fucom.c          \
diff --git a/mingw-w64-crt/math/x86/sincos.S b/mingw-w64-crt/math/x86/sincos.S
new file mode 100644
index 000000000..410e13edb
--- /dev/null
+++ b/mingw-w64-crt/math/x86/sincos.S
@@ -0,0 +1,47 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+#include <_mingw_mac.h>
+
+       .file   "sincos.S"
+       .text
+       .p2align 4
+       .globl __MINGW_USYMBOL(sincos)
+       .def    __MINGW_USYMBOL(sincos);        .scl    2;      .type   32;     
.endef
+__MINGW_USYMBOL(sincos):
+#ifdef __x86_64__
+       pushq   %rsi
+       pushq   %rdi
+       subq    $56, %rsp
+       movaps  %xmm6, 32(%rsp)
+       movq    %r8, %rsi
+       movq    %rdx, %rdi
+       movaps  %xmm0, %xmm6
+       call    sin
+       movsd   %xmm0, (%rdi)
+       movaps  %xmm6, %xmm0
+       call    cos
+       movsd   %xmm0, (%rsi)
+       movaps  32(%rsp), %xmm6
+       addq    $56, %rsp
+       popq    %rdi
+       popq    %rsi
+       retq
+#else
+       subl    $44, %esp
+       fldl    48(%esp)
+       fstl    (%esp)
+       fstpl   24(%esp)
+       call    _sin
+       movl    56(%esp), %eax
+       fstpl   (%eax)
+       fldl    24(%esp)
+       fstpl   (%esp)
+       call    _cos
+       movl    60(%esp), %eax
+       fstpl   (%eax)
+       addl    $44, %esp
+       ret
+#endif
diff --git a/mingw-w64-crt/math/x86/sincosf.S b/mingw-w64-crt/math/x86/sincosf.S
new file mode 100644
index 000000000..19080b13d
--- /dev/null
+++ b/mingw-w64-crt/math/x86/sincosf.S
@@ -0,0 +1,46 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+#include <_mingw_mac.h>
+
+       .file   "sincosf.S"
+       .text
+       .p2align 4
+       .globl __MINGW_USYMBOL(sincosf)
+       .def    __MINGW_USYMBOL(sincosf);       .scl    2;      .type   32;     
.endef
+__MINGW_USYMBOL(sincosf):
+#ifdef __x86_64__
+       pushq   %rsi
+       pushq   %rdi
+       subq    $56, %rsp
+       movaps  %xmm6, 32(%rsp)
+       movq    %r8, %rsi
+       movq    %rdx, %rdi
+       movaps  %xmm0, %xmm6
+       call    sinf
+       movss   %xmm0, (%rdi)
+       movaps  %xmm6, %xmm0
+       call    cosf
+       movss   %xmm0, (%rsi)
+       movaps  32(%rsp), %xmm6
+       addq    $56, %rsp
+       popq    %rdi
+       popq    %rsi
+       retq
+#else
+       subl    $28, %esp
+       flds    32(%esp)
+       fstps   (%esp)
+       call    _sinf
+       movl    36(%esp), %eax
+       fstps   (%eax)
+       flds    32(%esp)
+       fstps   (%esp)
+       call    _cosf
+       movl    40(%esp), %eax
+       fstps   (%eax)
+       addl    $28, %esp
+       ret
+#endif
-- 
2.43.0



_______________________________________________
Mingw-w64-public mailing list
Mingw-w64-public@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public

Reply via email to