https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112401

--- Comment #1 from JuzheZhong <juzhe.zhong at rivai dot ai> ---
GCC ASM:

subreg_to_reg_1:
        li      a5,32
        vsetvli zero,a5,e32,m8,ta,ma
        vle32.v v16,0(a0)
        vmv1r.v v8,v16
        vmv1r.v v7,v17
        vmv1r.v v6,v18
        vmv1r.v v5,v19
        vmv1r.v v4,v20
        vmv1r.v v3,v21
        vmv1r.v v2,v22
        vmv1r.v v1,v23
        beq     a2,zero,.L2
        li      a5,0
        vsetivli        zero,4,e32,m1,ta,ma
.L3:
        addi    a5,a5,1
        vadd.vv v8,v8,v8
        vadd.vv v7,v7,v7
        vadd.vv v6,v6,v6
        vadd.vv v5,v5,v5
        vadd.vv v4,v4,v4
        vadd.vv v3,v3,v3
        vadd.vv v2,v2,v2
        vadd.vv v1,v1,v1
        bne     a2,a5,.L3
.L2:
        vs1r.v  v8,0(a1)
        addi    a5,a1,16
        vs1r.v  v7,0(a5)
        addi    a5,a1,32
        vs1r.v  v6,0(a5)
        addi    a5,a1,48
        vs1r.v  v5,0(a5)
        addi    a5,a1,64
        vs1r.v  v4,0(a5)
        addi    a5,a1,80
        vs1r.v  v3,0(a5)
        addi    a5,a1,96
        vs1r.v  v2,0(a5)
        addi    a1,a1,112
        vs1r.v  v1,0(a1)
        ret

LLVM ASM:

subreg_to_reg_1:                        # @subreg_to_reg_1
        li      a3, 32
        vsetvli zero, a3, e32, m8, ta, ma
        vle32.v v8, (a0)
        addi    a0, a1, 16
        beqz    a2, .LBB0_2
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
        vsetivli        zero, 4, e32, m1, ta, ma
        vadd.vv v8, v8, v8
        vadd.vv v9, v9, v9
        vadd.vv v10, v10, v10
        vadd.vv v11, v11, v11
        vadd.vv v12, v12, v12
        vadd.vv v13, v13, v13
        vadd.vv v14, v14, v14
        addi    a2, a2, -1
        vadd.vv v15, v15, v15
        bnez    a2, .LBB0_1
.LBB0_2:
        vs1r.v  v8, (a1)
        vs1r.v  v9, (a0)
        addi    a1, a0, 16
        vs1r.v  v10, (a1)
        addi    a1, a0, 32
        vs1r.v  v11, (a1)
        addi    a1, a0, 48
        vs1r.v  v12, (a1)
        addi    a1, a0, 64
        vs1r.v  v13, (a1)
        addi    a1, a0, 80
        vs1r.v  v14, (a1)
        addi    a0, a0, 96
        vs1r.v  v15, (a0)
        ret

Reply via email to