Committed, thanks Richard and Juzhe.

On 2023/9/8 16:57, 钟居哲 wrote:
Thanks Richard.
LGTM again from RISC-V side :).

------------------------------------------------------------------------
juzhe.zh...@rivai.ai

    *From:* Richard Sandiford <mailto:richard.sandif...@arm.com>
    *Date:* 2023-09-08 16:56
    *To:* Lehua Ding <mailto:lehua.d...@rivai.ai>
    *CC:* gcc-patches <mailto:gcc-patches@gcc.gnu.org>; juzhe.zhong
    <mailto:juzhe.zh...@rivai.ai>
    *Subject:* Re: [PATCH V3] Support folding min(poly,poly) to const
    Lehua Ding <lehua.d...@rivai.ai> writes:
     > V3 change: Address Richard's comments.
     >
     > Hi,
     >
     > This patch adds support that tries to fold `MIN (poly, poly)` to
     > a constant. Consider the following C Code:
     >
     > ```
     > void foo2 (int* restrict a, int* restrict b, int n)
     > {
     >     for (int i = 0; i < 3; i += 1)
     >       a[i] += b[i];
     > }
     > ```
     >
     > Before this patch:
     >
     > ```
     > void foo2 (int * restrict a, int * restrict b, int n)
     > {
     >   vector([4,4]) int vect__7.27;
     >   vector([4,4]) int vect__6.26;
     >   vector([4,4]) int vect__4.23;
     >   unsigned long _32;
     >
     >   <bb 2> [local count: 268435456]:
     >   _32 = MIN_EXPR <3, POLY_INT_CST [4, 4]>;
     >   vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, _32, 0);
     >   vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, _32, 0);
     >   vect__7.27_9 = vect__6.26_15 + vect__4.23_20;
     >   .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, _32, 0,
    vect__7.27_9); [tail call]
     >   return;
     >
     > }
     > ```
     >
     > After this patch:
     >
     > ```
     > void foo2 (int * restrict a, int * restrict b, int n)
     > {
     >   vector([4,4]) int vect__7.27;
     >   vector([4,4]) int vect__6.26;
     >   vector([4,4]) int vect__4.23;
     >
     >   <bb 2> [local count: 268435456]:
     >   vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, 3, 0);
     >   vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, 3, 0);
     >   vect__7.27_9 = vect__6.26_15 + vect__4.23_20;
     >   .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, 3, 0,
    vect__7.27_9); [tail call]
     >   return;
     >
     > }
     > ```
     >
     > For RISC-V RVV, csrr and branch instructions can be reduced:
     >
     > Before this patch:
     >
     > ```
     > foo2:
     >         csrr    a4,vlenb
     >         srli    a4,a4,2
     >         li      a5,3
     >         bleu    a5,a4,.L5
     >         mv      a5,a4
     > .L5:
     >         vsetvli zero,a5,e32,m1,ta,ma
     >         ...
     > ```
     >
     > After this patch.
     >
     > ```
     > foo2:
     > vsetivli zero,3,e32,m1,ta,ma
     >         ...
     > ```
     >
     > Best,
     > Lehua
     >
     > gcc/ChangeLog:
     >
     > * fold-const.cc (can_min_p): New function.
     > (poly_int_binop): Try fold MIN_EXPR.
    OK, thanks.
    Richard
     > gcc/testsuite/ChangeLog:
     >
     > * gcc.target/riscv/rvv/autovec/vls/div-1.c: Adjust.
     > * gcc.target/riscv/rvv/autovec/vls/shift-3.c: Adjust.
     > * gcc.target/riscv/rvv/autovec/fold-min-poly.c: New test.
     >
     > ---
     >  gcc/fold-const.cc                             | 24
    +++++++++++++++++++
     >  .../riscv/rvv/autovec/fold-min-poly.c         | 24
    +++++++++++++++++++
     >  .../gcc.target/riscv/rvv/autovec/vls/div-1.c  |  2 +-
     >  .../riscv/rvv/autovec/vls/shift-3.c           |  2 +-
     >  4 files changed, 50 insertions(+), 2 deletions(-)
     >  create mode 100644
    gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
     >
     > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
     > index 1da498a3152..d19b4666c65 100644
     > --- a/gcc/fold-const.cc
     > +++ b/gcc/fold-const.cc
     > @@ -1213,6 +1213,25 @@ wide_int_binop (wide_int &res,
     >    return true;
     >  }
     >
     > +/* Returns true if we know who is smaller or equal, ARG1 or
    ARG2, and set the
     > +   min value to RES.  */
     > +bool
     > +can_min_p (const_tree arg1, const_tree arg2, poly_wide_int &res)
     > +{
     > +  if (known_le (wi::to_poly_widest (arg1), wi::to_poly_widest
    (arg2)))
     > +    {
     > +      res = wi::to_poly_wide (arg1);
     > +      return true;
     > +    }
     > +  else if (known_le (wi::to_poly_widest (arg2),
    wi::to_poly_widest (arg1)))
     > +    {
     > +      res = wi::to_poly_wide (arg2);
     > +      return true;
     > +    }
     > +
     > +  return false;
     > +}
     > +
     >  /* Combine two poly int's ARG1 and ARG2 under operation CODE to
     >     produce a new constant in RES.  Return FALSE if we don't know how
     >     to evaluate CODE at compile-time.  */
     > @@ -1261,6 +1280,11 @@ poly_int_binop (poly_wide_int &res, enum
    tree_code code,
     >  return false;
     >        break;
     >
     > +    case MIN_EXPR:
     > +      if (!can_min_p (arg1, arg2, res))
     > + return false;
     > +      break;
     > +
     >      default:
     >        return false;
     >      }
     > diff --git
    a/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
    b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
     > new file mode 100644
     > index 00000000000..de4c472c76e
     > --- /dev/null
     > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
     > @@ -0,0 +1,24 @@
     > +/* { dg-do compile } */
     > +/* { dg-options " -march=rv64gcv_zvl128b -mabi=lp64d -O3 --param
    riscv-autovec-preference=scalable --param riscv-autovec-lmul=m1
    -fno-vect-cost-model" } */
     > +
     > +void foo1 (int* restrict a, int* restrict b, int n)
     > +{
     > +    for (int i = 0; i < 4; i += 1)
     > +      a[i] += b[i];
     > +}
     > +
     > +void foo2 (int* restrict a, int* restrict b, int n)
     > +{
     > +    for (int i = 0; i < 3; i += 1)
     > +      a[i] += b[i];
     > +}
     > +
     > +void foo3 (int* restrict a, int* restrict b, int n)
     > +{
     > +    for (int i = 0; i < 5; i += 1)
     > +      a[i] += b[i];
     > +}
     > +
     > +/* { dg-final { scan-assembler-not {\tcsrr\t} } } */
     > +/* { dg-final { scan-assembler
    {\tvsetivli\tzero,4,e32,m1,t[au],m[au]} } } */
     > +/* { dg-final { scan-assembler
    {\tvsetivli\tzero,3,e32,m1,t[au],m[au]} } } */
     > diff --git
    a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c
    b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c
     > index f3388a86e38..40224c69458 100644
     > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c
     > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c
     > @@ -55,4 +55,4 @@ DEF_OP_VV (div, 512, int64_t, /)
     >
     >  /* { dg-final { scan-assembler-times
    {vdivu?\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 42 } } */
     >  /* TODO: Ideally, we should make sure there is no "csrr vlenb".
    However, we still have 'csrr vlenb' for some cases since we don't
    support VLS mode conversion which are needed by division.  */
     > -/* { dg-final { scan-assembler-times {csrr} 19 } } */
     > +/* { dg-final { scan-assembler-not {csrr} } } */
     > diff --git
    a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c
    b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c
     > index 98822b15657..b34a349949b 100644
     > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c
     > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c
     > @@ -55,4 +55,4 @@ DEF_OP_VV (shift, 512, int64_t, <<)
     >
     >  /* { dg-final { scan-assembler-times
    {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 41 } } */
     >  /* TODO: Ideally, we should make sure there is no "csrr vlenb".
    However, we still have 'csrr vlenb' for some cases since we don't
    support VLS mode conversion which are needed by division.  */
     > -/* { dg-final { scan-assembler-times {csrr} 18 } } */
     > +/* { dg-final { scan-assembler-not {csrr} } } */


--
Best,
Lehua

Reply via email to