https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98934
--- Comment #2 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
This is really poor with -mavx512f even. We should be able to do it like (which
is what LLVM does):
vpmovzxbd %xmm1, %zmm1
vpmovzxbd %xmm0, %zmm0
vpsravd %zmm1, %zmm0, %zmm0
vpmovdb %zmm0, %xmm0
Basically zero extend it out to from char to int and then do the shift and then
truncate back down to char.
Which we can emulate:
typedef char __attribute__((vector_size(16))) v16i8;
typedef int __attribute__((vector_size(16*sizeof(int)))) v16i32;
typedef int __attribute__((vector_size(4*sizeof(int)))) v4i32;
typedef char __attribute__((vector_size(4))) v4i8;
v16i8 f1(v16i8 x, v16i8 y)
{
v16i32 x1, y1;
x1 = __builtin_convertvector(x, __typeof(x1));
y1 = __builtin_convertvector(y, __typeof(y1));
x1 = x1 >> y1;
x = __builtin_convertvector(x1, __typeof(x));
return x;
}