https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88492

ptomsich at gcc dot gnu.org changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |ptomsich at gcc dot gnu.org

--- Comment #6 from ptomsich at gcc dot gnu.org ---
With the current master, the test case generates (with -mcpu=neoverse-n1):

        .arch armv8.2-a+crc+fp16+rcpc+dotprod+profile
        .file   "pr88492.c"
        .text
        .align  2
        .p2align 5,,15
        .global test_slp
        .type   test_slp, %function
test_slp:
.LFB0:
        .cfi_startproc
        ldr     q2, [x0]
        adrp    x1, .LC0
        ldr     q16, [x1, #:lo12:.LC0]
        uxtl    v4.8h, v2.8b
        uxtl2   v2.8h, v2.16b
        uxtl    v0.4s, v4.4h
        uxtl    v6.4s, v2.4h
        uxtl2   v4.4s, v4.8h
        uxtl2   v2.4s, v2.8h
        mov     v1.16b, v0.16b
        mov     v7.16b, v6.16b
        mov     v5.16b, v4.16b
        mov     v3.16b, v2.16b
        tbl     v0.16b, {v0.16b - v1.16b}, v16.16b
        tbl     v6.16b, {v6.16b - v7.16b}, v16.16b
        tbl     v4.16b, {v4.16b - v5.16b}, v16.16b
        tbl     v2.16b, {v2.16b - v3.16b}, v16.16b
        add     v0.4s, v0.4s, v4.4s
        add     v6.4s, v6.4s, v2.4s
        add     v0.4s, v0.4s, v6.4s
        addv    s0, v0.4s
        fmov    w0, s0
        ret
        .cfi_endproc
.LFE0:
        .size   test_slp, .-test_slp

which contrasts with LLVM13 (with -mcpu=neoverse-n1):

test_slp:                               // @test_slp
        .cfi_startproc
// %bb.0:                               // %entry
        ldr     q0, [x0]
        movi    v1.16b, #1
        movi    v2.2d, #0000000000000000
        udot    v2.4s, v0.16b, v1.16b
        addv    s0, v2.4s
        fmov    w0, s0
        ret
.Lfunc_end0:
        .size   test_slp, .Lfunc_end0-test_slp

or (LLVM13 w/o the mcpu-option):

        .type   test_slp,@function
test_slp:                               // @test_slp
        .cfi_startproc
// %bb.0:                               // %entry
        ldr     q0, [x0]
        ushll2  v1.8h, v0.16b, #0
        ushll   v0.8h, v0.8b, #0
        uaddl2  v2.4s, v0.8h, v1.8h
        uaddl   v0.4s, v0.4h, v1.4h
        add     v0.4s, v0.4s, v2.4s
        addv    s0, v0.4s
        fmov    w0, s0
        ret
.Lfunc_end0:
        .size   test_slp, .Lfunc_end0-test_slp

Reply via email to