On Wed, 5 Mar 2025, Jacek Caban wrote:

Based on Wine code by Piotr Caban and Alexandre Julliard, who granted 
permission to use it under the mingw-w64 license.

Unlike earlier versions, msvcr120 and UCRT implement fenv.h, but their 
representation differs from what mingw-w64
has used so far. fenv_t has a target-independent layout and flags, requiring 
translation to and from machine-specific
flags. Generic helpers handle this conversion.

Side note nitpick: The commit message here has longer lines than the common git standard; it makes it more readable in 80 char wide terminals if it would be word wrapped to the usual git commit message width :-)


On Windows x86_64, these functions operate only on SSE registers, ignoring x87. 
Since mingw long doubles use x87,
they would be unaffected by these calls. This patch does not follow that 
behavior; instead, it modifies x87 registers
as well, effectively following the 32-bit x86 code path.

Additionally, in 32-bit versions older than msvcr71, functions like _control87, 
__control87_2, and _statusfp did not
support SSE, even though they would otherwise be useful as helpers. This patch 
provides variants that operate on both
SSE and x87 for x86 and x86_64 (though we could still consider a different 
approach for ARM targets).

I left FE_*_ENV handling unchanged, but we could consider making them proper 
pointers to structs filled with the
appropriate values instead.

This patch also adds support for ARM64EC. The ARM64EC version currently ignores 
x87, as 80-bit long doubles are not
supported anyway.
---
v2: Simplify fenv_encode, spotted by Piotr.

mingw-w64-crt/Makefile.am            |   4 +-
mingw-w64-crt/include/internal.h     | 115 +++++++++
mingw-w64-crt/misc/feclearexcept.c   |  40 +--
mingw-w64-crt/misc/fegetenv.c        |  31 +--
mingw-w64-crt/misc/fegetexceptflag.c |  29 +--
mingw-w64-crt/misc/fegetround.c      |  17 +-
mingw-w64-crt/misc/feholdexcept.c    |  23 +-
mingw-w64-crt/misc/feraiseexcept.c   |  32 +--
mingw-w64-crt/misc/fesetenv.c        | 103 ++++----
mingw-w64-crt/misc/fesetexceptflag.c |  42 +--
mingw-w64-crt/misc/fesetround.c      |  43 +--
mingw-w64-crt/misc/fetestexcept.c    |  23 +-
mingw-w64-crt/misc/feupdateenv.c     |  10 +-
mingw-w64-crt/misc/mingw_controlfp.c |  53 ++++
mingw-w64-crt/misc/mingw_setfp.c     | 373 +++++++++++++++++++++++++++
mingw-w64-headers/crt/fenv.h         |  72 ++----
16 files changed, 674 insertions(+), 336 deletions(-)
create mode 100644 mingw-w64-crt/misc/mingw_controlfp.c
create mode 100644 mingw-w64-crt/misc/mingw_setfp.c

diff --git a/mingw-w64-crt/Makefile.am b/mingw-w64-crt/Makefile.am
index 186e17979..7fdfe61cf 100644
--- a/mingw-w64-crt/Makefile.am
+++ b/mingw-w64-crt/Makefile.am
@@ -977,6 +977,7 @@ src_libmingwex=\
  misc/dirname.c \
  misc/feclearexcept.c   misc/fegetenv.c            misc/fegetexceptflag.c  
misc/fegetround.c            misc/feholdexcept.c    \
  misc/feraiseexcept.c   misc/fesetenv.c            misc/fesetexceptflag.c  
misc/fesetround.c            misc/fetestexcept.c    \
+  misc/mingw_setfp.c \
  misc/feupdateenv.c     misc/ftruncate.c           misc/fwide.c            
misc/getlogin.c              misc/getopt.c          \
  misc/gettimeofday.c    misc/__mingw_has_sse.c      \
  misc/mempcpy.c         misc/mingw-aligned-malloc.c \
@@ -1038,7 +1039,8 @@ src_libmingwex_x86=\
  math/x86/remainderl.S     math/x86/remquol.S        \
  math/x86/scalbn.S         math/x86/scalbnf.S        math/x86/scalbnl.S        
\
  math/x86/sin.def.h        \
-  math/x86/sinl.c           math/x86/sinl_internal.S  math/x86/tanl.S
+  math/x86/sinl.c           math/x86/sinl_internal.S  math/x86/tanl.S \
+  misc/mingw_controlfp.c


src_libmingwex32=$(src_libmingwex_x86)
diff --git a/mingw-w64-crt/include/internal.h b/mingw-w64-crt/include/internal.h
index b3f1186e1..e9ba79fb3 100644
--- a/mingw-w64-crt/include/internal.h
+++ b/mingw-w64-crt/include/internal.h
@@ -14,6 +14,7 @@ extern "C" {
#endif

#include <limits.h>
+#include <fenv.h>
#include <windows.h>

#pragma pack(push,_CRT_PACKING)
@@ -154,6 +155,120 @@ extern "C" {
# define __mingw_has_sse()  0
#endif

+#if defined(__i386__) || defined(__x86_64__)
+enum fenv_masks
+{
+    FENV_X_INVALID = 0x00100010,
+    FENV_X_DENORMAL = 0x00200020,
+    FENV_X_ZERODIVIDE = 0x00080008,
+    FENV_X_OVERFLOW = 0x00040004,
+    FENV_X_UNDERFLOW = 0x00020002,
+    FENV_X_INEXACT = 0x00010001,
+    FENV_X_AFFINE = 0x00004000,
+    FENV_X_UP = 0x00800200,
+    FENV_X_DOWN = 0x00400100,
+    FENV_X_24 = 0x00002000,
+    FENV_X_53 = 0x00001000,
+    FENV_Y_INVALID = 0x10000010,
+    FENV_Y_DENORMAL = 0x20000020,
+    FENV_Y_ZERODIVIDE = 0x08000008,
+    FENV_Y_OVERFLOW = 0x04000004,
+    FENV_Y_UNDERFLOW = 0x02000002,
+    FENV_Y_INEXACT = 0x01000001,
+    FENV_Y_UP = 0x80000200,
+    FENV_Y_DOWN = 0x40000100,
+    FENV_Y_FLUSH = 0x00000400,
+    FENV_Y_FLUSH_SAVE = 0x00000800
+};
+
+/* encodes x87/sse control/status word in ulong */
+static inline unsigned long fenv_encode(unsigned int x, unsigned int y)
+{
+    unsigned long ret = 0;

It's probably clearer elsewhere, but is the case that x is a fenv word for x87, and y is one for sse?

It's also kinda hard to look at the constant definitions, as part of them is identical across FENV_X, but part of them isn't (the upper bits). For people not intimately familiar with the x86-isms, it would be nice with some comment somewhere, but I'm not sure where that would be best placed.

+
+    if (x & _EM_INVALID) ret |= FENV_X_INVALID;
+    if (x & _EM_DENORMAL) ret |= FENV_X_DENORMAL;
+    if (x & _EM_ZERODIVIDE) ret |= FENV_X_ZERODIVIDE;
+    if (x & _EM_OVERFLOW) ret |= FENV_X_OVERFLOW;
+    if (x & _EM_UNDERFLOW) ret |= FENV_X_UNDERFLOW;
+    if (x & _EM_INEXACT) ret |= FENV_X_INEXACT;
+    if (x & _IC_AFFINE) ret |= FENV_X_AFFINE;
+    if (x & _RC_UP) ret |= FENV_X_UP;
+    if (x & _RC_DOWN) ret |= FENV_X_DOWN;
+    if (x & _PC_24) ret |= FENV_X_24;
+    if (x & _PC_53) ret |= FENV_X_53;
+
+    if (y & _EM_INVALID) ret |= FENV_Y_INVALID;
+    if (y & _EM_DENORMAL) ret |= FENV_Y_DENORMAL;
+    if (y & _EM_ZERODIVIDE) ret |= FENV_Y_ZERODIVIDE;
+    if (y & _EM_OVERFLOW) ret |= FENV_Y_OVERFLOW;
+    if (y & _EM_UNDERFLOW) ret |= FENV_Y_UNDERFLOW;
+    if (y & _EM_INEXACT) ret |= FENV_Y_INEXACT;
+    if (y & _RC_UP) ret |= FENV_Y_UP;
+    if (y & _RC_DOWN) ret |= FENV_Y_DOWN;
+    if (y & _DN_FLUSH) ret |= FENV_Y_FLUSH;
+    if (y & _DN_FLUSH_OPERANDS_SAVE_RESULTS) ret |= FENV_Y_FLUSH_SAVE;
+
+    return ret;
+}
+
+/* decodes x87/sse control/status word, returns FALSE on error */
+static inline BOOL fenv_decode(unsigned long enc, unsigned int *x, unsigned 
int *y)
+{
+    *x = *y = 0;
+    if ((enc & FENV_X_INVALID) == FENV_X_INVALID) *x |= _EM_INVALID;
+    if ((enc & FENV_X_DENORMAL) == FENV_X_DENORMAL) *x |= _EM_DENORMAL;
+    if ((enc & FENV_X_ZERODIVIDE) == FENV_X_ZERODIVIDE) *x |= _EM_ZERODIVIDE;
+    if ((enc & FENV_X_OVERFLOW) == FENV_X_OVERFLOW) *x |= _EM_OVERFLOW;
+    if ((enc & FENV_X_UNDERFLOW) == FENV_X_UNDERFLOW) *x |= _EM_UNDERFLOW;
+    if ((enc & FENV_X_INEXACT) == FENV_X_INEXACT) *x |= _EM_INEXACT;
+    if ((enc & FENV_X_AFFINE) == FENV_X_AFFINE) *x |= _IC_AFFINE;
+    if ((enc & FENV_X_UP) == FENV_X_UP) *x |= _RC_UP;
+    if ((enc & FENV_X_DOWN) == FENV_X_DOWN) *x |= _RC_DOWN;
+    if ((enc & FENV_X_24) == FENV_X_24) *x |= _PC_24;
+    if ((enc & FENV_X_53) == FENV_X_53) *x |= _PC_53;
+
+    if ((enc & FENV_Y_INVALID) == FENV_Y_INVALID) *y |= _EM_INVALID;
+    if ((enc & FENV_Y_DENORMAL) == FENV_Y_DENORMAL) *y |= _EM_DENORMAL;
+    if ((enc & FENV_Y_ZERODIVIDE) == FENV_Y_ZERODIVIDE) *y |= _EM_ZERODIVIDE;
+    if ((enc & FENV_Y_OVERFLOW) == FENV_Y_OVERFLOW) *y |= _EM_OVERFLOW;
+    if ((enc & FENV_Y_UNDERFLOW) == FENV_Y_UNDERFLOW) *y |= _EM_UNDERFLOW;
+    if ((enc & FENV_Y_INEXACT) == FENV_Y_INEXACT) *y |= _EM_INEXACT;
+    if ((enc & FENV_Y_UP) == FENV_Y_UP) *y |= _RC_UP;
+    if ((enc & FENV_Y_DOWN) == FENV_Y_DOWN) *y |= _RC_DOWN;
+    if ((enc & FENV_Y_FLUSH) == FENV_Y_FLUSH) *y |= _DN_FLUSH;
+    if ((enc & FENV_Y_FLUSH_SAVE) == FENV_Y_FLUSH_SAVE) *y |= 
_DN_FLUSH_OPERANDS_SAVE_RESULTS;
+
+    return fenv_encode(*x, *y) == enc;
+}
+#else
+static inline unsigned long fenv_encode(unsigned int x, unsigned int y)
+{
+    if (y & _EM_DENORMAL)
+        y = (y & ~_EM_DENORMAL) | 0x20;

This could really warrant a comment explaining what it does, as 0x20 isn't a named constant.

+
+    return x | y;
+}
+
+static inline BOOL fenv_decode(unsigned long enc, unsigned int *x, unsigned 
int *y)
+{
+    if (enc & 0x20)
+        enc = (enc & ~0x20) | _EM_DENORMAL;
+
+    *x = *y = enc;
+    return TRUE;
+}
+#endif
+
+void __mingw_setfp( unsigned int *cw, unsigned int cw_mask, unsigned int *sw, 
unsigned int sw_mask );
+void __mingw_setfp_sse( unsigned int *cw, unsigned int cw_mask, unsigned int 
*sw, unsigned int sw_mask );
+#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__))
+unsigned int __mingw_controlfp(unsigned int newval, unsigned int mask);
+int __mingw_control87_2(unsigned int, unsigned int, unsigned int *, unsigned 
int *);
+#else
+#define __mingw_controlfp _controlfp
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/mingw-w64-crt/misc/feclearexcept.c 
b/mingw-w64-crt/misc/feclearexcept.c
index a5ab0f095..5fa811fbe 100644
--- a/mingw-w64-crt/misc/feclearexcept.c
+++ b/mingw-w64-crt/misc/feclearexcept.c
@@ -4,44 +4,18 @@
 * No warranty is given; refer to the file DISCLAIMER.PD within this package.
 */

-#include <fenv.h>
#include <internal.h>

/* 7.6.2.1
   The feclearexcept function clears the supported exceptions
   represented by its argument.  */

-int feclearexcept (int excepts)
+int feclearexcept(int flags)
{
-  fenv_t _env;
-#if defined(_ARM_) || defined(__arm__)
-  __asm__ volatile ("fmrx %0, FPSCR" : "=r" (_env));
-  _env.__cw &= ~(excepts & FE_ALL_EXCEPT);
-  __asm__ volatile ("fmxr FPSCR, %0" : : "r" (_env));
-#elif defined(_ARM64_) || defined(__aarch64__)
-  unsigned __int64 fpcr;
-  (void) _env;
-  __asm__ volatile ("mrs %0, fpcr" : "=r" (fpcr));
-  fpcr &= ~(excepts & FE_ALL_EXCEPT);
-  __asm__ volatile ("msr fpcr, %0" : : "r" (fpcr));
-#else
-  int _mxcsr;
-  if (excepts == FE_ALL_EXCEPT)
-    {
-      __asm__ volatile ("fnclex");
-    }
-  else
-    {
-      __asm__ volatile ("fnstenv %0" : "=m" (_env));
-      _env.__status_word &= ~(excepts & FE_ALL_EXCEPT);
-      __asm__ volatile ("fldenv %0" : : "m" (_env));
-    }
-  if (__mingw_has_sse ())
-    {
-      __asm__ volatile ("stmxcsr %0" : "=m" (_mxcsr));
-      _mxcsr &= ~(((excepts & FE_ALL_EXCEPT)));
-      __asm__ volatile ("ldmxcsr %0" : : "m" (_mxcsr));
-    }
-#endif /* defined(_ARM_) || defined(__arm__) || defined(_ARM64_) || 
defined(__aarch64__) */
-  return (0);
+    fenv_t env;
+
+    fegetenv(&env);
+    flags &= FE_ALL_EXCEPT;
+    env._Fe_stat &= ~fenv_encode(flags, flags);
+    return fesetenv(&env);
}
diff --git a/mingw-w64-crt/misc/fegetenv.c b/mingw-w64-crt/misc/fegetenv.c
index e17fd491c..cadaf4572 100644
--- a/mingw-w64-crt/misc/fegetenv.c
+++ b/mingw-w64-crt/misc/fegetenv.c
@@ -4,34 +4,25 @@
 * No warranty is given; refer to the file DISCLAIMER.PD within this package.
 */

-#include <fenv.h>
#include <internal.h>

/* 7.6.4.1
   The fegetenv function stores the current floating-point environment
   in the object pointed to by envp.  */

-int fegetenv (fenv_t * envp)
+int fegetenv(fenv_t *env)
{
-#if defined(_ARM_) || defined(__arm__)
-  __asm__ volatile ("fmrx %0, FPSCR" : "=r" (*envp));
-#elif defined(_ARM64_) || defined(__aarch64__)
-  unsigned __int64 fpcr;
-  __asm__ volatile ("mrs %0, fpcr" : "=r" (fpcr));
-  envp->__cw = fpcr;
+#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__))
+    unsigned int x87, sse;
+    __mingw_control87_2(0, 0, &x87, &sse);
+    env->_Fe_ctl = fenv_encode(x87, sse);
+    __mingw_setfp(NULL, 0, &x87, 0);
+    __mingw_setfp_sse(NULL, 0, &sse, 0);
+    env->_Fe_stat = fenv_encode(x87, sse);
#else
-  __asm__ __volatile__ ("fnstenv %0;": "=m" (*envp));
- /* fnstenv sets control word to non-stop for all exceptions, so we
-    need to reload our env to restore the original mask.  */
-  __asm__ __volatile__ ("fldenv %0" : : "m" (*envp));
-  if (__mingw_has_sse ())
-    {
-      int _mxcsr;
-      __asm__ __volatile__ ("stmxcsr %0" : "=m" (_mxcsr));
-      envp->__unused0 = (((unsigned int) _mxcsr) >> 16);
-      envp->__unused1 = (((unsigned int) _mxcsr) & 0xffff);
-    }
-#endif /* defined(_ARM_) || defined(__arm__) || defined(_ARM64_) || 
defined(__aarch64__) */
+    env->_Fe_ctl = fenv_encode(0, _controlfp(0, 0));
+    env->_Fe_stat = fenv_encode(0, _statusfp());

This bit feels a bit asymetrical; in fesetenv we use our own __mingw_setfp for setting things, but in fegetenv we just use _controlfp() and _statusfp() from the CRT. (This is also a bit of a functional change as we've been entirely detached from the CRT's handling of this bit so far.)

It's probably fine though, but I'm wondering if this would be clearer if we'd have our own __mingw_controlfp/__mingw_setupfp at first for ARM/AArch64 too, so we'd have the full loop of both setting and getting in our own code, just like before. Then we could convert __mingw_controlfp into a call to the CRT's _controlfp in a separate step.

It doesn't make this patch smaller or easier, but it makes it marginally easier to grasp the full picture. The separate step of taking CRT's _controlfp and _statusfp into use would then be easier to consider on its own. I presume the UCRT's _controlfp/_statusfp works as it should, but e.g. for msvcrt.dll I'm not sure if anybody has tested them? (I did testrun these patches with both UCRT and msvcrt on all 4 architectures, and it does seem to run fine.)


Aside from that, I think this mostly looks good. Most of the fe* function are kinda straightforward and everything boils down to calls to the internal helper functions. As this is based on Wine with quite well tested implementations, I'm fairly certain that those aspects are ok.

// Martin



_______________________________________________
Mingw-w64-public mailing list
Mingw-w64-public@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public

Reply via email to