https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106952

--- Comment #4 from Richard Biener <rguenth at gcc dot gnu.org> ---
With the proposed patches for PR88540 and PR105715 I get with -O3 -msse4.1

intersection:
.LFB2:
        .cfi_startproc
        movss   .LC0(%rip), %xmm5
        pxor    %xmm2, %xmm2
        movss   (%rdi), %xmm4
        movss   12(%rsi), %xmm1
        movss   12(%rdi), %xmm0
        divss   %xmm2, %xmm5
        movss   (%rsi), %xmm3
        subss   %xmm4, %xmm1
        subss   %xmm4, %xmm3
        pxor    %xmm4, %xmm4
        mulss   %xmm0, %xmm1
        mulss   %xmm0, %xmm3
        movaps  %xmm1, %xmm0
        cmpnltss        %xmm2, %xmm0
        blendvps        %xmm0, %xmm1, %xmm4
        movaps  %xmm3, %xmm0
        cmpnltss        %xmm2, %xmm0
        pxor    %xmm2, %xmm2
        blendvps        %xmm0, %xmm3, %xmm2
        movss   16(%rsi), %xmm0
        minss   %xmm5, %xmm3
        minss   %xmm5, %xmm1
        movss   4(%rdi), %xmm5
        minss   %xmm4, %xmm2
        movss   16(%rdi), %xmm4
        subss   %xmm5, %xmm0
        maxss   %xmm3, %xmm1
        movss   4(%rsi), %xmm3
        subss   %xmm5, %xmm3
        mulss   %xmm4, %xmm0
        movss   8(%rdi), %xmm5
        mulss   %xmm4, %xmm3
        movaps  %xmm2, %xmm4
        maxss   %xmm0, %xmm4
        minss   %xmm1, %xmm0
        maxss   %xmm3, %xmm2
        minss   %xmm1, %xmm3
        movss   8(%rsi), %xmm1
        subss   %xmm5, %xmm1
        maxss   %xmm3, %xmm0
        movss   20(%rsi), %xmm3
        minss   %xmm4, %xmm2
        movss   20(%rdi), %xmm4
        subss   %xmm5, %xmm3
        mulss   %xmm4, %xmm1
        movaps  %xmm2, %xmm5
        mulss   %xmm4, %xmm3
        movaps  %xmm2, %xmm4
        maxss   %xmm1, %xmm4
        minss   %xmm0, %xmm1
        movaps  %xmm3, %xmm2
        maxss   %xmm3, %xmm5
        minss   %xmm0, %xmm2
        minss   %xmm5, %xmm4
        maxss   %xmm1, %xmm2
        comiss  %xmm4, %xmm2
        seta    %al
        ret

there's the existing issue that RTL conditional move expansion doesn't
preserve the equality of constants for

  _33 = t2_34 < 0.0;
  _12 = _33 ? 0.0 : t2_34;

but it emits two loads from the constant pool for 0.0 here which in the x86
backend fail to be recognized as min/max.

Reply via email to