https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81366
Bug ID: 81366
Summary: pragma omp simd reduce(max:m) not vectorizing
Product: gcc
Version: 8.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: c++
Assignee: unassigned at gcc dot gnu.org
Reporter: ryan.burn at gmail dot com
Target Milestone: ---
Compiling this code:
###########################################################
double max(double* x, int n) {
double m = 0;
int i;
#pragma omp simd linear (i) reduction(max:m)
for (i=0; i<n; ++i)
m = std::max(x[i], m);
return m;
}
###########################################################
with g++ -fopenmp-simd -march=haswell -O3 -S main.cpp
g++ -v
Using built-in specs.
COLLECT_GCC=/usr/local/bin/g++
COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc/x86_64-apple-darwin15.0.0/8.0.0/lto-wrapper
Target: x86_64-apple-darwin15.0.0
Configured with: ../gcc/configure --disable-multilib --enable-languages=c++
--with-gmp=/opt/local --with-libiconv-prefix=/opt/local
Thread model: posix
gcc version 8.0.0 20170610 (experimental) (GCC)
Produces the below unvectorized assembly. Similar code with "+" will work.
############################################################
LFE1116:
.align 4,0x90
.globl __Z3maxPdi
__Z3maxPdi:
LFB1117:
leaq 8(%rsp), %r10
LCFI6:
andq $-32, %rsp
pushq -8(%r10)
pushq %rbp
LCFI7:
movq %rsp, %rbp
pushq %r10
LCFI8:
vmovsd lC1(%rip), %xmm1
vmovsd %xmm1, -48(%rbp)
testl %esi, %esi
jle L13
leal -1(%rsi), %eax
leaq 8(%rdi,%rax,8), %rax
.align 4,0x90
L14:
vmovsd (%rdi), %xmm0
vucomisd %xmm0, %xmm1
jbe L20
addq $8, %rdi
cmpq %rax, %rdi
jne L14
L13:
vmovsd -48(%rbp), %xmm2
vmaxsd lC0(%rip), %xmm2, %xmm0
popq %r10
LCFI9:
popq %rbp
leaq -8(%r10), %rsp
LCFI10:
ret
.align 4,0x90
L20:
LCFI11:
addq $8, %rdi
vmovsd %xmm0, -48(%rbp)
cmpq %rax, %rdi
je L13
vmovapd %xmm0, %xmm1
jmp L14
############################################################