https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86144
Bug ID: 86144
Summary: GCC is not generating vector math calls to svml/acml
functions
Product: gcc
Version: 8.1.1
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: vekumar at gcc dot gnu.org
Target Milestone: ---
As per GCC 8.1.0 Manual
---snip--
-mveclibabi=type
Specifies the ABI type to use for vectorizing intrinsics using an external
library. Supported values for type are ‘svml’ for the Intel short vector math
library and ‘acml’ for the AMD math core library. To use this option, both
-ftree-vectorize and -funsafe-math-optimizations have to be enabled, and an
SVML or ACML ABI-compatible library must be specified at link time.
GCC currently emits calls to vmldExp2, vmldLn2, vmldLog102, vmldLog102,
vmldPow2, vmldTanh2, vmldTan2, vmldAtan2, vmldAtanh2, vmldCbrt2, vmldSinh2,
vmldSin2, vmldAsinh2, vmldAsin2, vmldCosh2, vmldCos2, vmldAcosh2, vmldAcos2,
vmlsExp4, vmlsLn4, vmlsLog104, vmlsLog104, vmlsPow4, vmlsTanh4, vmlsTan4,
vmlsAtan4, vmlsAtanh4, vmlsCbrt4, vmlsSinh4, vmlsSin4, vmlsAsinh4, vmlsAsin4,
vmlsCosh4, vmlsCos4, vmlsAcosh4 and vmlsAcos4 for corresponding function type
when -mveclibabi=svml is used, and __vrd2_sin, __vrd2_cos, __vrd2_exp,
__vrd2_log, __vrd2_log2, __vrd2_log10, __vrs4_sinf, __vrs4_cosf, __vrs4_expf,
__vrs4_logf, __vrs4_log2f, __vrs4_log10f and __vrs4_powf for the corresponding
function type when -mveclibabi=acml is used.
--snip--
#include <math.h>
double test_vect_exp (double* __restrict__ A, double* __restrict__ B, int size
)
{
int i;
for (i = 0; i < size; i++)
A[i] = exp(B[i]);
return A[0];
}
gcc-5.4.0/bin/gcc -O3 -mveclibabi=acml -ffast-math exp.c -S generated vector
math calls to amdlibm/intel svml.
---Snip---
L8:
movapd (%r12), %xmm0
addl $1, %r15d
addq $16, %r12
addq $16, %rbx
call __vrd2_exp
movups %xmm0, -16(%rbx)
cmpl %r15d, 4(%rsp)
ja .L8
movl 12(%rsp), %eax
addl %eax, %ebp
cmpl %eax, 8(%rsp)
je .L10
---Snip--
from gcc-6.0 we don't generate calls to acml/svml by default.
What we generate is a call to glibC vector math function (libmvec)
---Snip---
.L8:
movapd (%r12), %xmm0
addl $1, %r15d
addq $16, %r12
addq $16, %rbx
call _ZGVbN2v___exp_finite
movups %xmm0, -16(%rbx)
cmpl %r15d, 4(%rsp)
ja .L8
movl 12(%rsp), %eax
addl %eax, %ebp
cmpl %eax, 8(%rsp)
je .L10
---Snip---