https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71300
Bug ID: 71300
Summary: Vector ABI bug for some AVX vectorized variants
Product: gcc
Version: 4.9.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: andrew.n.senkevich at gmail dot com
Target Milestone: ---
Hi,
according with Vector ABI vectorized variant in AVX ISA of
#pragma omp declare simd notinbranch
void callee(double, double*);
expects ymm0 filled with 4 doubles and ymm1 filled with 4 double* values.
But really double* values are passed in xmm1 and xmm2, that leads to important
ABI issue.
-bash-4.2$ cat test.cc
#pragma omp declare simd notinbranch
extern void callee(double a, double* b);
#define VLEN 4
typedef double __attribute__((vector_size(8 * VLEN))) vec;
vec x, r;
int main()
{
for (int i = 0; i < VLEN; i++) x[i] = i;
#pragma omp simd
for (int i = 0; i < VLEN; i++) callee(x[i], &r[i]);
return (int)r[VLEN-1];
}
g++ -O1 -fopenmp -ffast-math test.cc -mavx -c
-bash-4.2$ objdump -d test.o
test.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <main>:
0: 4c 8d 54 24 08 lea 0x8(%rsp),%r10
5: 48 83 e4 e0 and $0xffffffffffffffe0,%rsp
9: 41 ff 72 f8 pushq -0x8(%r10)
d: 55 push %rbp
e: 48 89 e5 mov %rsp,%rbp
11: 41 52 push %r10
13: 48 83 ec 28 sub $0x28,%rsp
17: 48 c7 05 00 00 00 00 movq $0x0,0x0(%rip) # 22 <main+0x22>
1e: 00 00 00 00
22: c5 fb 10 1d 00 00 00 vmovsd 0x0(%rip),%xmm3 # 2a <main+0x2a>
29: 00
2a: c5 fb 11 1d 00 00 00 vmovsd %xmm3,0x0(%rip) # 32 <main+0x32>
31: 00
32: c5 fb 10 25 00 00 00 vmovsd 0x0(%rip),%xmm4 # 3a <main+0x3a>
39: 00
3a: c5 fb 11 25 00 00 00 vmovsd %xmm4,0x0(%rip) # 42 <main+0x42>
41: 00
42: c5 fb 10 2d 00 00 00 vmovsd 0x0(%rip),%xmm5 # 4a <main+0x4a>
49: 00
4a: c5 fb 11 2d 00 00 00 vmovsd %xmm5,0x0(%rip) # 52 <main+0x52>
51: 00
52: c5 fb 12 0d 00 00 00 vmovddup 0x0(%rip),%xmm1 # 5a
<main+0x5a>
59: 00
5a: c5 f9 28 3d 00 00 00 vmovapd 0x0(%rip),%xmm7 # 62 <main+0x62>
61: 00
62: c5 f8 29 7d d0 vmovaps %xmm7,-0x30(%rbp)
67: c5 f9 28 05 00 00 00 vmovapd 0x0(%rip),%xmm0 # 6f <main+0x6f>
6e: 00
6f: c5 f8 29 45 e0 vmovaps %xmm0,-0x20(%rbp)
74: c5 f1 d4 15 00 00 00 vpaddq 0x0(%rip),%xmm1,%xmm2 # 7c
<main+0x7c>
7b: 00
7c: c5 f1 d4 0d 00 00 00 vpaddq 0x0(%rip),%xmm1,%xmm1 # 84
<main+0x84>
83: 00
84: c5 fd 28 45 d0 vmovapd -0x30(%rbp),%ymm0
89: e8 00 00 00 00 callq 8e <main+0x8e>
8e: c5 fb 2c 05 00 00 00 vcvttsd2si 0x0(%rip),%eax # 96
<main+0x96>
95: 00
96: 48 83 c4 28 add $0x28,%rsp
9a: 41 5a pop %r10
9c: 5d pop %rbp
9d: 49 8d 62 f8 lea -0x8(%r10),%rsp
a1: c3 retq