https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118466
--- Comment #1 from Richard Biener <rguenth at gcc dot gnu.org> ---
Hmm, early-break should handle this in theory. It does:
> ./cc1 -quiet t4.c -O3 -msse4 -fopt-info-vec -fdump-tree-optimized
t4.c:3:24: optimized: loop vectorized using 16 byte vectors
t4.c:3:24: optimized: loop versioned for vectorization because of possible
aliasing
but the generated code is awful.
.L10:
movdqa (%r9,%rax), %xmm0
addq $1, %rdx
paddq %xmm7, %xmm2
movups %xmm0, (%rcx,%rax)
addq $16, %rax
cmpq %rdx, %r10
je .L29
.L3:
movdqa %xmm2, %xmm0
movdqa %xmm2, %xmm1
paddq %xmm6, %xmm0
paddq %xmm5, %xmm1
pcmpeqq %xmm4, %xmm0
pcmpeqq %xmm4, %xmm1
pcmpeqq %xmm3, %xmm0
pcmpeqq %xmm3, %xmm1
pand %xmm1, %xmm0
pxor %xmm8, %xmm0
ptest %xmm0, %xmm0
je .L10
# vect_vec_iv_.16_83 = PHI <_84(8), _85(23)>
_84 = vect_vec_iv_.16_83 + { 4, 4 };
_86 = vect_vec_iv_.16_83 + { 2, 2 };
vect_i_12.17_89 = vect_vec_iv_.16_83 + { 1, 1 };
vect_i_12.17_90 = _86 + { 1, 1 };
mask_patt_17.18_93 = vect_i_12.17_89 != _91;
mask_patt_17.18_94 = vect_i_12.17_90 != _91;
vexit_reduc_95 = mask_patt_17.18_94 & mask_patt_17.18_93;
if (vexit_reduc_95 == { -1, -1 })
goto <bb 4>; [89.00%]
huh.