https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66115

            Bug ID: 66115
           Summary: When using -O0 with -mavx the compiler uses aligned
                    loads for possibly unaligned function parameters
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: carloscastro10 at hotmail dot com
  Target Milestone: ---

When compiling with -mavx, operands to SSE instructions are allowed to be
aligned. Consider the code below:

#include <immintrin.h>

using namespace std;

int main()
{
  int *a = new int[5];
  __m128i sse0 = _mm_setzero_si128();
  sse0 = _mm_avg_epu8(sse0,*(__m128i*)(a+1)); // Unaligned parameter
  _mm_storeu_si128((__m128i*)(a),sse0);
  delete [] a;
}

When implemented using the compiler options "-O3 -mavx", the compiler outputs
the following (correct) code:

main:
        subq    $8, %rsp
        movl    $20, %edi
        call    operator new[](unsigned long)
        vpxor   %xmm0, %xmm0, %xmm0
        movq    %rax, %rdi
        vpavgb  4(%rax), %xmm0, %xmm0
        vmovdqu %xmm0, (%rax)
        call    operator delete[](void*)
        xorl    %eax, %eax
        addq    $8, %rsp
        ret

However, when implemented using "-O0 -mavx", the compiler incorrectly makes use
of aligned load operations (vmovdqa). This results in a segfault:

main:
        pushq   %rbp
        movq    %rsp, %rbp
        subq    $80, %rsp
        movl    $20, %edi
        call    operator new[](unsigned long)
        movq    %rax, -16(%rbp)
        vpxor   %xmm0, %xmm0, %xmm0
        vmovdqa %xmm0, -80(%rbp)
        movq    -16(%rbp), %rax
        addq    $4, %rax
        vmovdqa (%rax), %xmm0
        vmovdqa -80(%rbp), %xmm1
        vmovdqa %xmm1, -64(%rbp)
        vmovdqa %xmm0, -48(%rbp)
        vmovdqa -48(%rbp), %xmm0
        vmovdqa -64(%rbp), %xmm1
        vpavgb  %xmm0, %xmm1, %xmm0
        vmovdqa %xmm0, -80(%rbp)
        movq    -16(%rbp), %rax
        movq    %rax, -8(%rbp)
        vmovdqa -80(%rbp), %xmm0
        vmovdqa %xmm0, -32(%rbp)
        vmovdqa -32(%rbp), %xmm0
        movq    -8(%rbp), %rax
        vmovdqu %xmm0, (%rax)
        cmpq    $0, -16(%rbp)
        je      .L2
        movq    -16(%rbp), %rax
        movq    %rax, %rdi
        call    operator delete[](void*)
.L2:
        movl    $0, %eax
        leave
        ret

This problem is present in all versions of gcc all the way to 5.1

Reply via email to