https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98607
Bug ID: 98607 Summary: GDC merging computations but rounding mode has changed Product: gcc Version: unknown Status: UNCONFIRMED Severity: normal Priority: P3 Component: d Assignee: ibuclaw at gdcproject dot org Reporter: guillaume.piolat at gmail dot com Target Milestone: --- Created attachment 49923 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=49923&action=edit Repro file # Description It seems GCC optimize common sub-expressions despite they are separated by a __builtin_ia32_ldmxcsr call, which changes SSE rounding mode. # Compiler version GDC 10.2 # Godbolt See on Godbolt: https://godbolt.org/z/c7EKfY You can notice how only one cvtps2dq instruction is generated. # Reproduce failure Run this D program with GDC 10.2: -------------- repro.d --------------------- import core.simd; import gcc.builtins; alias __m128 = float4; alias __m128i = int4; alias __m128d = double2; void bug() { uint savedRounding = _MM_GET_ROUNDING_MODE(); _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); __m128i A = _mm_cvtps_epi32(_mm_setr_ps(1.4f, -2.1f, 53.5f, -2.9f)); assert(A.array == [1, -2, 54, -3]); // GCC might merge this branch with above! Despite _MM_SET_ROUNDING_MODE // not being pure. _MM_SET_ROUNDING_MODE(_MM_ROUND_DOWN); A = _mm_cvtps_epi32(_mm_setr_ps(1.4f, -2.1f, 53.5f, -2.9f)); // that whole expression is computed once, but rounding mode has changed assert(A.array == [1, -3, 53, -3]); } uint _mm_getcsr() { return __builtin_ia32_stmxcsr(); } void _mm_setcsr(uint controlWord) @trusted { __builtin_ia32_ldmxcsr(controlWord); } __m128i _mm_cvtps_epi32 (__m128 a) { return __builtin_ia32_cvtps2dq(a); } enum int _MM_ROUND_NEAREST = 0x0000; /// MXCSR Rounding mode. enum int _MM_ROUND_DOWN = 0x2000; ///ditto enum int _MM_ROUND_UP = 0x4000; ///ditto enum int _MM_ROUND_TOWARD_ZERO = 0x6000; ///ditto enum int _MM_ROUND_MASK = 0x6000; /// MXCSR Rounding mode mask. uint _MM_GET_ROUNDING_MODE() { return _mm_getcsr() & _MM_ROUND_MASK; } void _MM_SET_ROUNDING_MODE(int _MM_ROUND_xxxx) { _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | _MM_ROUND_xxxx); } __m128 _mm_setr_ps (float e3, float e2, float e1, float e0) { float[4] result = [e3, e2, e1, e0]; return loadUnaligned!(float4)(result.ptr); } float4 loadUnaligned(Vec)(const(float)* pvec) @trusted if (is(Vec == float4)) { return __builtin_ia32_loadups(pvec); } ------------------------------------------------------