https://gcc.gnu.org/bugzilla/show_bug.cgi?id=34195

--- Comment #6 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
On aarch64 (with -fno-vect-cost-model because I used thunderx as my cpu):
.L4:
        ldr     q0, [x6, x9]
        add     w7, w7, 1
        ldr     q2, [x6, x10]
        add     x6, x6, 16
        fadd    v3.4s, v0.4s, v2.4s
        fsub    v2.4s, v2.4s, v0.4s
        fadd    v1.4s, v3.4s, v4.4s
        fadd    v0.4s, v2.4s, v4.4s
        fadd    v1.4s, v1.4s, v3.4s
        fadd    v0.4s, v0.4s, v2.4s
        fadd    v1.4s, v1.4s, v3.4s
        fadd    v0.4s, v0.4s, v2.4s
        fadd    v1.4s, v1.4s, v3.4s
        fadd    v0.4s, v0.4s, v2.4s
        fadd    v1.4s, v1.4s, v3.4s
        fadd    v0.4s, v0.4s, v2.4s
        fadd    v1.4s, v1.4s, v3.4s
        fadd    v0.4s, v0.4s, v2.4s
        fadd    v1.4s, v1.4s, v3.4s
        fadd    v0.4s, v0.4s, v2.4s
        fadd    v1.4s, v1.4s, v3.4s
        fadd    v0.4s, v0.4s, v2.4s
        fadd    v1.4s, v1.4s, v3.4s
        fadd    v0.4s, v0.4s, v2.4s
        fadd    v6.4s, v1.4s, v3.4s
        fadd    v7.4s, v0.4s, v2.4s
        st2     {v6.4s - v7.4s}, [x8], 32
        cmp     w7, w2
        bcc     .L4


Maybe not the same as what you expected.

Reply via email to