LGTM :)

On Thu, Feb 1, 2024 at 11:46 PM Juzhe-Zhong <juzhe.zh...@rivai.ai> wrote:
>
> Realize in recent benchmark evaluation (coremark-pro zip-test):
>
>         vid.v   v2
>         vmv.v.i v5,0
> .L9:
>         vle16.v v3,0(a4)
>         vrsub.vx        v4,v2,a6   ---> LICM failed to hoist it outside the 
> loop.
>
> The root cause is:
>
> (insn 56 47 57 4 (set (subreg:DI (reg:HI 220) 0)
>         (reg:DI 223)) "rvv.c":11:9 208 {*movdi_64bit}  -> Its result used by 
> the following vrsub.vx then supress the hoist of the vrsub.vx
>      (nil))
>
> (insn 57 56 59 4 (set (reg:RVVMF2HI 216)
>         (if_then_else:RVVMF2HI (unspec:RVVMF32BI [
>                     (const_vector:RVVMF32BI repeat [
>                             (const_int 1 [0x1])
>                         ])
>                     (reg:DI 350)
>                     (const_int 2 [0x2]) repeated x2
>                     (const_int 1 [0x1])
>                     (reg:SI 66 vl)
>                     (reg:SI 67 vtype)
>                 ] UNSPEC_VPREDICATE)
>             (minus:RVVMF2HI (vec_duplicate:RVVMF2HI (reg:HI 220))
>                 (reg:RVVMF2HI 217))
>             (unspec:RVVMF2HI [
>                     (reg:DI 0 zero)
>                 ] UNSPEC_VUNDEF))) "rvv.c":11:9 6938 
> {pred_subrvvmf2hi_reverse_scalar}
>      (expr_list:REG_DEAD (reg:HI 220)
>         (nil)))
>
> This patch fixes it generate (set (reg:HI) (subreg:HI (reg:DI))) instead of 
> (set (subreg:DI (reg:DI)) (reg:DI)).
>
> After this patch:
>
>         vid.v   v2
>         vrsub.vx        v2,v2,a7
>         vmv.v.i v4,0
> .L3:
>         vle16.v v3,0(a4)
>
> Tested on both RV32 and RV64 no regression.
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv.cc (riscv_legitimize_move): Fix poly_int dest 
> generation.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/autovec/poly_licm-1.c: New test.
>         * gcc.target/riscv/rvv/autovec/poly_licm-2.c: New test.
>
> ---
>  gcc/config/riscv/riscv.cc                     |  9 ++++---
>  .../riscv/rvv/autovec/poly_licm-1.c           | 18 +++++++++++++
>  .../riscv/rvv/autovec/poly_licm-2.c           | 27 +++++++++++++++++++
>  3 files changed, 50 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 529ef5e84b7..6e22b43e618 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -2711,16 +2711,17 @@ riscv_legitimize_move (machine_mode mode, rtx dest, 
> rtx src)
>                                     (const_poly_int:HI [m, n])
>                                     (const_poly_int:SI [m, n]).  */
>           rtx tmp = gen_reg_rtx (Pmode);
> -         riscv_legitimize_poly_move (Pmode, gen_lowpart (Pmode, dest), tmp,
> -                                     src);
> +         rtx tmp2 = gen_reg_rtx (Pmode);
> +         riscv_legitimize_poly_move (Pmode, tmp2, tmp, src);
> +         emit_move_insn (dest, gen_lowpart (mode, tmp2));
>         }
>        else
>         {
>           /* In RV32 system, handle (const_poly_int:SI [m, n])
>                                     (const_poly_int:DI [m, n]).
>              In RV64 system, handle (const_poly_int:DI [m, n]).
> -       FIXME: Maybe we could gen SImode in RV32 and then sign-extend to 
> DImode,
> -       the offset should not exceed 4GiB in general.  */
> +            FIXME: Maybe we could gen SImode in RV32 and then sign-extend to
> +            DImode, the offset should not exceed 4GiB in general.  */
>           rtx tmp = gen_reg_rtx (mode);
>           riscv_legitimize_poly_move (mode, dest, tmp, src);
>         }
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c
> new file mode 100644
> index 00000000000..b7da65f0996
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
> -fno-schedule-insns2" } */
> +
> +extern int wsize;
> +
> +typedef unsigned short Posf;
> +#define NIL 0
> +
> +void foo (Posf *p)
> +{
> +  register unsigned n, m;
> +  do {
> +      m = *--p;
> +      *p = (Posf)(m >= wsize ? m-wsize : NIL);
> +  } while (--n);
> +}
> +
> +/* { dg-final { scan-assembler-times 
> {vid\.v\s+v[0-9]+\s+addi\s+\s*[a-x0-9]+,\s*[a-x0-9]+,\s*-1\s+vrsub\.vx\s+} 1 
> } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c
> new file mode 100644
> index 00000000000..ffb3c63149f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
> -fno-schedule-insns2" } */
> +
> +typedef unsigned short uint16_t;
> +
> +void AAA (uint16_t *x, uint16_t *y, unsigned wsize, unsigned count)
> +{
> +  unsigned m = 0, n = count;
> +  register uint16_t *p;
> +
> +  p = x;
> +
> +  do {
> +    m = *--p;
> +    *p = (uint16_t)(m >= wsize ? m-wsize : 0);
> +  } while (--n);
> +
> +  n = wsize;
> +  p = y;
> +
> +  do {
> +      m = *--p;
> +      *p = (uint16_t)(m >= wsize ? m-wsize : 0);
> +  } while (--n);
> +}
> +
> +/* { dg-final { scan-assembler-times {vid\.v\s+v[0-9]+\s+vrsub\.vx\s+} 2 } } 
> */
> --
> 2.36.1
>

Reply via email to