https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85633

            Bug ID: 85633
           Summary: [8 Regression] reorders function ignoring fpu
                    exception state
           Product: gcc
           Version: 8.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: jtaylor.debian at googlemail dot com
  Target Milestone: ---

gcc-8 seems to ignore the global fpu exception state when reordering functions.
In this example it reorders the fpu_invalid_set_function which is _not_ marked
as const before the last _mm_min_ps call which can set the fpu invalid
exception.

#include <emmintrin.h>
int fpu_invalid_set(void);
float reduce(__m128 a);

float fun(float *a)
{
    __m128 c1 = _mm_set_ps1(1000);
    __m128 c2 = _mm_set_ps1(1000);
    for (int i=0; i < 64; i+=8) {
        __m128 x1 = _mm_loadu_ps(&a[i]);
        __m128 x2 = _mm_loadu_ps(&a[i+4]);
        c1 = _mm_min_ps(c1, x1);
        c2 = _mm_min_ps(c2, x2);
    }
    c1 = _mm_min_ps(c1, c2);
    if (fpu_invalid_set()) {
        return 1;
    }
    else {
        return reduce(c1);
    }
}

gcc -O2 -c fun.c
objdump -d fun.o
0000000000000000 <fun>:
   0:   48 83 ec 28             sub    $0x28,%rsp
   4:   0f 28 0d 00 00 00 00    movaps 0x0(%rip),%xmm1        # b <fun+0xb>
   b:   48 8d 87 00 01 00 00    lea    0x100(%rdi),%rax
  12:   0f 28 c1                movaps %xmm1,%xmm0
  15:   0f 1f 00                nopl   (%rax)
  18:   0f 10 17                movups (%rdi),%xmm2
  1b:   0f 10 5f 10             movups 0x10(%rdi),%xmm3
  1f:   48 83 c7 20             add    $0x20,%rdi
  23:   0f 5d c2                minps  %xmm2,%xmm0
  26:   0f 5d cb                minps  %xmm3,%xmm1
  29:   48 39 f8                cmp    %rdi,%rax
  2c:   75 ea                   jne    18 <fun+0x18>
  2e:   0f 29 44 24 10          movaps %xmm0,0x10(%rsp)
  33:   0f 29 0c 24             movaps %xmm1,(%rsp)
  37:   e8 00 00 00 00          callq  3c <fun+0x3c> <<<<<<<<<<<< call to early
  3c:   0f 28 0c 24             movaps (%rsp),%xmm1
  40:   0f 28 44 24 10          movaps 0x10(%rsp),%xmm0
  45:   85 c0                   test   %eax,%eax
  47:   74 17                   je     60 <fun+0x60>
  49:   f3 0f 10 05 00 00 00    movss  0x0(%rip),%xmm0        # 51 <fun+0x51>
  50:   00 
  51:   48 83 c4 28             add    $0x28,%rsp
  55:   c3                      retq   
  56:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
  5d:   00 00 00 
  60:   0f 5d c1                minps  %xmm1,%xmm0 <<<<<<<<<<< min at wrong
place
  63:   48 83 c4 28             add    $0x28,%rsp
  67:   e9 00 00 00 00          jmpq   6c <fun+0x6c>


gcc 7 and earlier execute the minps instruction before calling the function
that checks the fpu state.

Reply via email to