https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68793

--- Comment #7 from ktkachov at gcc dot gnu.org ---
(In reply to Allan Jensen from comment #6)
> I mean the neon64 case, not 32-bit.

Seems so. I get:
_Z16RGBA2BGRA_neon64PKjPjj:
.LFB3215:
        .cfi_startproc
        subs    w7, w2, #7
        mov     w5, 0
        beq     .L4
        .p2align 2
.L8:
        ubfiz   x3, x5, 2, 32
        add     w5, w5, 8
        add     x4, x0, x3
        add     x3, x1, x3
        cmp     w5, w7
        ld4     {v4.8b - v7.8b}, [x4]
        mov     v0.8b, v6.8b
        mov     v1.8b, v5.8b
        mov     v2.8b, v4.8b
        mov     v3.8b, v7.8b
        st4     {v0.8b - v3.8b}, [x3]
        bcc     .L8
.L4:
        cmp     w5, w2
        bcs     .L10
        uxtw    x3, w5
        sub     w2, w2, #1
        sub     w2, w2, w5
        add     x5, x3, 1
        add     x5, x2, x5
        lsl     x2, x3, 2
        lsl     x5, x5, 2
        .p2align 2
.L7:
        ldr     w3, [x0, x2]
        and     w4, w3, 16711935
        str     w4, [x1, x2]
        ldr     w3, [x0, x2]
        and     w3, w3, -16711936
        orr     w3, w4, w3, ror (32 - 16)
        str     w3, [x1, x2]
        add     x2, x2, 4
        cmp     x2, x5
        bne     .L7
        ret
.L10:
        ret

Reply via email to