https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118466

--- Comment #1 from Richard Biener <rguenth at gcc dot gnu.org> ---
Hmm, early-break should handle this in theory.  It does:

> ./cc1 -quiet t4.c -O3 -msse4 -fopt-info-vec -fdump-tree-optimized
t4.c:3:24: optimized: loop vectorized using 16 byte vectors
t4.c:3:24: optimized:  loop versioned for vectorization because of possible
aliasing

but the generated code is awful.

.L10:
        movdqa  (%r9,%rax), %xmm0
        addq    $1, %rdx
        paddq   %xmm7, %xmm2
        movups  %xmm0, (%rcx,%rax)
        addq    $16, %rax
        cmpq    %rdx, %r10
        je      .L29
.L3:    
        movdqa  %xmm2, %xmm0
        movdqa  %xmm2, %xmm1
        paddq   %xmm6, %xmm0
        paddq   %xmm5, %xmm1
        pcmpeqq %xmm4, %xmm0
        pcmpeqq %xmm4, %xmm1
        pcmpeqq %xmm3, %xmm0
        pcmpeqq %xmm3, %xmm1
        pand    %xmm1, %xmm0
        pxor    %xmm8, %xmm0
        ptest   %xmm0, %xmm0
        je      .L10

  # vect_vec_iv_.16_83 = PHI <_84(8), _85(23)>
  _84 = vect_vec_iv_.16_83 + { 4, 4 };
  _86 = vect_vec_iv_.16_83 + { 2, 2 };
  vect_i_12.17_89 = vect_vec_iv_.16_83 + { 1, 1 };
  vect_i_12.17_90 = _86 + { 1, 1 };
  mask_patt_17.18_93 = vect_i_12.17_89 != _91;
  mask_patt_17.18_94 = vect_i_12.17_90 != _91;
  vexit_reduc_95 = mask_patt_17.18_94 & mask_patt_17.18_93;
  if (vexit_reduc_95 == { -1, -1 })
    goto <bb 4>; [89.00%]

huh.

Reply via email to