https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112431

--- Comment #8 from GCC Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by Pan Li <pa...@gcc.gnu.org>:

https://gcc.gnu.org/g:303195e2a6b6f0e8f42e0578b61f9f37c6250beb

commit r14-6008-g303195e2a6b6f0e8f42e0578b61f9f37c6250beb
Author: Juzhe-Zhong <juzhe.zh...@rivai.ai>
Date:   Thu Nov 30 20:08:43 2023 +0800

    RISC-V: Support widening register overlap for vf4/vf8

    size_t
    foo (char const *buf, size_t len)
    {
      size_t sum = 0;
      size_t vl = __riscv_vsetvlmax_e8m8 ();
      size_t step = vl * 4;
      const char *it = buf, *end = buf + len;
      for (; it + step <= end;)
        {
          vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
          it += vl;
          vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
          it += vl;
          vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
          it += vl;
          vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
          it += vl;

          asm volatile("nop" ::: "memory");
          vint64m8_t vw0 = __riscv_vsext_vf8_i64m8 (v0, vl);
          vint64m8_t vw1 = __riscv_vsext_vf8_i64m8 (v1, vl);
          vint64m8_t vw2 = __riscv_vsext_vf8_i64m8 (v2, vl);
          vint64m8_t vw3 = __riscv_vsext_vf8_i64m8 (v3, vl);

          asm volatile("nop" ::: "memory");
          size_t sum0 = __riscv_vmv_x_s_i64m8_i64 (vw0);
          size_t sum1 = __riscv_vmv_x_s_i64m8_i64 (vw1);
          size_t sum2 = __riscv_vmv_x_s_i64m8_i64 (vw2);
          size_t sum3 = __riscv_vmv_x_s_i64m8_i64 (vw3);

          sum += sumation (sum0, sum1, sum2, sum3);
        }
      return sum;
    }

    Before this patch:

            add     a3,s0,s1
            add     a4,s6,s1
            add     a5,s7,s1
            vsetvli zero,s0,e64,m8,ta,ma
            vle8.v  v4,0(s1)
            vle8.v  v3,0(a3)
            mv      s1,s2
            vle8.v  v2,0(a4)
            vle8.v  v1,0(a5)
            nop
            vsext.vf8       v8,v4
            vsext.vf8       v16,v2
            vs8r.v  v8,0(sp)
            vsext.vf8       v24,v1
            vsext.vf8       v8,v3
            nop
            vmv.x.s a1,v8
            vl8re64.v       v8,0(sp)
            vmv.x.s a3,v24
            vmv.x.s a2,v16
            vmv.x.s a0,v8
            add     s2,s2,s5
            call    sumation
            add     s3,s3,a0
            bgeu    s4,s2,.L5

    After this patch:

            add     a3,s0,s1
            add     a4,s6,s1
            add     a5,s7,s1
            vsetvli zero,s0,e64,m8,ta,ma
            vle8.v  v15,0(s1)
            vle8.v  v23,0(a3)
            mv      s1,s2
            vle8.v  v31,0(a4)
            vle8.v  v7,0(a5)
            vsext.vf8       v8,v15
            vsext.vf8       v16,v23
            vsext.vf8       v24,v31
            vsext.vf8       v0,v7
            vmv.x.s a3,v0
            vmv.x.s a2,v24
            vmv.x.s a1,v16
            vmv.x.s a0,v8
            add     s2,s2,s5
            call    sumation
            add     s3,s3,a0
            bgeu    s4,s2,.L5

            PR target/112431

    gcc/ChangeLog:

            * config/riscv/vector.md: Add widening overlap of vf2/vf4.

    gcc/testsuite/ChangeLog:

            * gcc.target/riscv/rvv/base/pr112431-16.c: New test.
            * gcc.target/riscv/rvv/base/pr112431-17.c: New test.
            * gcc.target/riscv/rvv/base/pr112431-18.c: New test.

Reply via email to