https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82147
--- Comment #1 from Andrew Pinski <pinskia at gcc dot gnu.org> --- It is even worse for float*4->float*2,float*2. Take (ignore the obvious aliasing issues): void f(float *restrict a, float * restrict b, float * restrict c, int s) { for(int i = 0; i< s;i++) { a[i*2] = c[i*4]; a[i*2+1] = c[i*4+1]; b[i*2] = c[i*4 + 2]; b[i*2+1] = c[i*4 + 3]; } } #define vector16 __attribute__((vector_size(16))) void f1(float *restrict a, float * restrict b, float * restrict c, int s) { for(int i = 0; i< s;i++) { vector16 double d = *(vector16 double*)&c[i*2]; *(double*)&a[i*2] = d[0]; *(double*)&b[i*2] = d[1]; } }