The following testcase

#include <emmintrin.h>

typedef union {
    __m128i v;
    int m[4];
} VectorUnion;

VectorUnion one()
{
    VectorUnion r = { _mm_set1_epi32(1) };
    return r;
}

int main()
{
    VectorUnion x = one();
    if (0xffff == _mm_movemask_epi8(_mm_cmpeq_epi32(x.v, x.v))) {
        return 0;
    }
    return 1;
}

compiles (-Wall -Wextra -O2 -mssse3) to

00000000004004d0 <main>:
  4004d0:       66 0f 6f 05 38 01 00 00         movdqa 0x138(%rip),%xmm0
  4004d8:       66 0f 7f 44 24 d8       movdqa %xmm0,-0x28(%rsp)                
  4004de:       48 8b 44 24 d8          mov    -0x28(%rsp),%rax                 
  4004e3:       48 89 44 24 e8          mov    %rax,-0x18(%rsp)                 
  4004e8:       48 8b 44 24 e0          mov    -0x20(%rsp),%rax                 
  4004ed:       48 89 44 24 f0          mov    %rax,-0x10(%rsp)                 
  4004f2:       66 0f 6f 44 24 e8       movdqa -0x18(%rsp),%xmm0                
  4004f8:       66 0f 76 c0             pcmpeqd %xmm0,%xmm0                     
  4004fc:       66 0f d7 c0             pmovmskb %xmm0,%eax                     

As can be seen the xmm0 register is stored on the stack, then copied via two 64
bit moves on the stack and then, from there, loaded back into xmm0. The values
on the stack are not needed/used later on.

I expected gcc to note those no-op moves and produce code like

movdqa 0x138(%rip),%xmm0
pcmpeqd %xmm0,%xmm0                                                   
pmovmskb %xmm0,%eax


-- 
           Summary: missed optimization when using union of __m128i and
                    int[4]
           Product: gcc
           Version: 4.3.2
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: kretz at kde dot org
 GCC build triplet: x86_64-unknown-linux-gnu
  GCC host triplet: x86_64-unknown-linux-gnu
GCC target triplet: x86_64-unknown-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40122

Reply via email to