https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92655
Bug ID: 92655
Summary: Suboptimal vectorization of variable shift
Product: gcc
Version: 10.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: rguenth at gcc dot gnu.org
Target Milestone: ---
For the following testcase vect_recog_vector_vector_shift_pattern isn't able
to fully elide the use of 'int' and thus we fall back to vectorization with
SSE instead of AVX with -O3 -march=core-avx2
#define MULSEQ {32,34,35,38}
#define STRIDE 4
#define M61 2305843009213693951ULL
#define BITS 61
typedef unsigned long uint64_t;
typedef struct myvec_t { uint64_t __attribute__ ((aligned (32))) val[STRIDE]; }
__attribute__ ((aligned (32))) myvec_t;
inline uint64_t MULWU(uint64_t k, uint64_t m)
{
return (( (k)<<(m) & M61) + ( (k) >> (BITS-m)) ) ;
}
myvec_t MULWU(myvec_t x)
{
myvec_t __attribute__ ((aligned (32))) v;
myvec_t __attribute__ ((aligned (32))) SPECIALMUL=MULSEQ;
for(int j=0;j<STRIDE;j++)
v.val[j] = MULWU(x.val[j], SPECIALMUL.val[j]);
return v;
}