https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66115
Bug ID: 66115
Summary: When using -O0 with -mavx the compiler uses aligned
loads for possibly unaligned function parameters
Product: gcc
Version: unknown
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: carloscastro10 at hotmail dot com
Target Milestone: ---
When compiling with -mavx, operands to SSE instructions are allowed to be
aligned. Consider the code below:
#include <immintrin.h>
using namespace std;
int main()
{
int *a = new int[5];
__m128i sse0 = _mm_setzero_si128();
sse0 = _mm_avg_epu8(sse0,*(__m128i*)(a+1)); // Unaligned parameter
_mm_storeu_si128((__m128i*)(a),sse0);
delete [] a;
}
When implemented using the compiler options "-O3 -mavx", the compiler outputs
the following (correct) code:
main:
subq $8, %rsp
movl $20, %edi
call operator new[](unsigned long)
vpxor %xmm0, %xmm0, %xmm0
movq %rax, %rdi
vpavgb 4(%rax), %xmm0, %xmm0
vmovdqu %xmm0, (%rax)
call operator delete[](void*)
xorl %eax, %eax
addq $8, %rsp
ret
However, when implemented using "-O0 -mavx", the compiler incorrectly makes use
of aligned load operations (vmovdqa). This results in a segfault:
main:
pushq %rbp
movq %rsp, %rbp
subq $80, %rsp
movl $20, %edi
call operator new[](unsigned long)
movq %rax, -16(%rbp)
vpxor %xmm0, %xmm0, %xmm0
vmovdqa %xmm0, -80(%rbp)
movq -16(%rbp), %rax
addq $4, %rax
vmovdqa (%rax), %xmm0
vmovdqa -80(%rbp), %xmm1
vmovdqa %xmm1, -64(%rbp)
vmovdqa %xmm0, -48(%rbp)
vmovdqa -48(%rbp), %xmm0
vmovdqa -64(%rbp), %xmm1
vpavgb %xmm0, %xmm1, %xmm0
vmovdqa %xmm0, -80(%rbp)
movq -16(%rbp), %rax
movq %rax, -8(%rbp)
vmovdqa -80(%rbp), %xmm0
vmovdqa %xmm0, -32(%rbp)
vmovdqa -32(%rbp), %xmm0
movq -8(%rbp), %rax
vmovdqu %xmm0, (%rax)
cmpq $0, -16(%rbp)
je .L2
movq -16(%rbp), %rax
movq %rax, %rdi
call operator delete[](void*)
.L2:
movl $0, %eax
leave
ret
This problem is present in all versions of gcc all the way to 5.1