https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
--- Comment #11 from Richard Biener <rguenth at gcc dot gnu.org> --- The recent patch improved this to avoid some of the compares. We still have the three-argument PHI and thus three VEC_CONDs. .L10: vmovups (%rdi,%rdx), %ymm0 vcmpltps %ymm6, %ymm0, %ymm3 vcmpltps %ymm2, %ymm0, %ymm1 vpandn %ymm1, %ymm3, %ymm1 vblendvps %ymm1, %ymm5, %ymm4, %ymm1 vblendvps %ymm3, %ymm7, %ymm1, %ymm1 vaddps %ymm1, %ymm0, %ymm0 vaddps (%rax,%rdx), %ymm0, %ymm0 vmovups %ymm0, (%rax,%rdx) addq $32, %rdx cmpq $1024, %rdx jne .L10 vs. GCC 12 .L6: vmovups (%rdi,%rdx), %ymm1 vcmpltps %ymm5, %ymm1, %ymm0 vcmpltps %ymm6, %ymm1, %ymm4 vblendvps %ymm0, %ymm3, %ymm2, %ymm0 vandps %ymm3, %ymm4, %ymm4 vaddps %ymm4, %ymm0, %ymm0 vaddps %ymm1, %ymm0, %ymm0 vaddps (%rax,%rdx), %ymm0, %ymm0 vmovups %ymm0, (%rax,%rdx) addq $32, %rdx cmpq $1024, %rdx jne .L6 which at least overall looks comparable.