Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int

Uros Bizjak Wed, 05 Jun 2024 01:09:45 -0700

On Wed, Jun 5, 2024 at 9:38 AM Li, Pan2 <[email protected]> wrote:
>
> Thanks Richard, will commit after the rebased pass the regression test.
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <[email protected]>
> Sent: Wednesday, June 5, 2024 3:19 PM
> To: Li, Pan2 <[email protected]>
> Cc: [email protected]; [email protected]; [email protected]; 
> [email protected]
> Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned 
> scalar int
>
> On Tue, May 28, 2024 at 10:29 AM <[email protected]> wrote:
> >
> > From: Pan Li <[email protected]>
> >
> > This patch would like to add the middle-end presentation for the
> > saturation sub.  Aka set the result of add to the min when downflow.
> > It will take the pattern similar as below.
> >
> > SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y));
> >
> > For example for uint8_t, we have
> >
> > * SAT_SUB (255, 0)   => 255
> > * SAT_SUB (1, 2)     => 0
> > * SAT_SUB (254, 255) => 0
> > * SAT_SUB (0, 255)   => 0
> >
> > Given below SAT_SUB for uint64
> >
> > uint64_t sat_sub_u64 (uint64_t x, uint64_t y)
> > {
> >   return (x + y) & (- (uint64_t)((x >= y)));
> > }


Is the above testcase correct? You need "(x + y)" as the first term.

BTW: After applying your patch, I'm not able to produce .SAT_SUB with
x86_64 and the following testcase:

--cut here--
typedef unsigned short T;

void foo (T *out, T *x, T *y, int n)
{
  int i;

  for (i = 0; i < n; i++)
    out[i] = (x[i] - y[i]) & (-(T)(x[i] >= y[i]));
}
--cut here--

with gcc -O2 -ftree-vectorize -msse2

I think that all relevant optabs were added for x86 in

https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b59de4113262f2bee14147eb17eb3592f03d9556

as part of the commit for PR112600, comment 8.

Uros.

> >
> > Before this patch:
> > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
> > {
> >   _Bool _1;
> >   long unsigned int _3;
> >   uint64_t _6;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _1 = x_4(D) >= y_5(D);
> >   _3 = x_4(D) - y_5(D);
> >   _6 = _1 ? _3 : 0;
> >   return _6;
> > ;;    succ:       EXIT
> > }
> >
> > After this patch:
> > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
> > {
> >   uint64_t _6;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
> >   return _6;
> > ;;    succ:       EXIT
> > }
> >
> > The below tests are running for this patch:
> > *. The riscv fully regression tests.
> > *. The x86 bootstrap tests.
> > *. The x86 fully regression tests.
>
> OK.
>
> Thanks,
> Richard.
>
> >         PR target/51492
> >         PR target/112600
> >
> > gcc/ChangeLog:
> >
> >         * internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB.
> >         * match.pd: Add new match for SAT_SUB.
> >         * optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub.
> >         * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add
> >         new decl for generated in match.pd.
> >         (build_saturation_binary_arith_call): Add new helper function
> >         to build the gimple call to binary SAT alu.
> >         (match_saturation_arith): Rename from.
> >         (match_unsigned_saturation_add): Rename to.
> >         (match_unsigned_saturation_sub): Add new func to match the
> >         unsigned sat sub.
> >         (math_opts_dom_walker::after_dom_children): Add SAT_SUB matching
> >         try when COND_EXPR.
> >
> > Signed-off-by: Pan Li <[email protected]>
> > ---
> >  gcc/internal-fn.def       |  1 +
> >  gcc/match.pd              | 14 ++++++++
> >  gcc/optabs.def            |  4 +--
> >  gcc/tree-ssa-math-opts.cc | 67 +++++++++++++++++++++++++++------------
> >  4 files changed, 64 insertions(+), 22 deletions(-)
> >
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 25badbb86e5..24539716e5b 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | 
> > ECF_NOTHROW, first,
> >                               smulhrs, umulhrs, binary)
> >
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, 
> > binary)
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, 
> > binary)
> >
> >  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
> >  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 024e3350465..3e334533ff8 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >  (match (unsigned_integer_sat_add @0 @1)
> >   (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> >
> > +/* Unsigned saturation sub, case 1 (branch with gt):
> > +   SAT_U_SUB = X > Y ? X - Y : 0  */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (cond (gt @0 @1) (minus @0 @1) integer_zerop)
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > +      && types_match (type, @0, @1))))
> > +
> > +/* Unsigned saturation sub, case 2 (branch with ge):
> > +   SAT_U_SUB = X >= Y ? X - Y : 0.  */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (cond (ge @0 @1) (minus @0 @1) integer_zerop)
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > +      && types_match (type, @0, @1))))
> > +
> >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> >     x >  y  &&  x == XXX_MIN  -->  false . */
> >  (for eqne (eq ne)
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 3f2cb46aff8..bc2611abdc2 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3")
> >  OPTAB_NX(sub_optab, "sub$Q$a3")
> >  OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc)
> >  OPTAB_VX(subv_optab, "sub$F$a3")
> > -OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', 
> > gen_signed_fixed_libfunc)
> > -OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', 
> > gen_unsigned_fixed_libfunc)
> > +OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', 
> > gen_signed_fixed_libfunc)
> > +OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', 
> > gen_unsigned_fixed_libfunc)
> >  OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', 
> > gen_int_fp_fixed_libfunc)
> >  OPTAB_NX(smul_optab, "mul$P$a3")
> >  OPTAB_NX(smul_optab, "mul$F$a3")
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index 62da1c5ee08..4717302b728 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple 
> > *cast_stmt, gimple *&use_stmt,
> >  }
> >
> >  extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> > +extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> > +
> > +static void
> > +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn 
> > fn,
> > +                                   tree lhs, tree op_0, tree op_1)
> > +{
> > +  if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), 
> > OPTIMIZE_FOR_BOTH))
> > +    {
> > +      gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
> > +      gimple_call_set_lhs (call, lhs);
> > +      gsi_replace (gsi, call, /* update_eh_info */ true);
> > +    }
> > +}
> >
> >  /*
> > - * Try to match saturation arith pattern(s).
> > - *   1. SAT_ADD (unsigned)
> > - *      _7 = _4 + _6;
> > - *      _8 = _4 > _7;
> > - *      _9 = (long unsigned int) _8;
> > - *      _10 = -_9;
> > - *      _12 = _7 | _10;
> > - *      =>
> > - *      _12 = .SAT_ADD (_4, _6);  */
> > + * Try to match saturation unsigned add.
> > + *   _7 = _4 + _6;
> > + *   _8 = _4 > _7;
> > + *   _9 = (long unsigned int) _8;
> > + *   _10 = -_9;
> > + *   _12 = _7 | _10;
> > + *   =>
> > + *   _12 = .SAT_ADD (_4, _6);  */
> > +
> >  static void
> > -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> > +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> >  {
> > -  gcall *call = NULL;
> > +  tree ops[2];
> > +  tree lhs = gimple_assign_lhs (stmt);
> >
> > +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL))
> > +    build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], 
> > ops[1]);
> > +}
> > +
> > +/*
> > + * Try to match saturation unsigned sub.
> > + *   _1 = _4 >= _5;
> > + *   _3 = _4 - _5;
> > + *   _6 = _1 ? _3 : 0;
> > + *   =>
> > + *   _6 = .SAT_SUB (_4, _5);  */
> > +
> > +static void
> > +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
> > +{
> >    tree ops[2];
> >    tree lhs = gimple_assign_lhs (stmt);
> >
> > -  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> > -      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> > -                                        OPTIMIZE_FOR_BOTH))
> > -    {
> > -      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> > -      gimple_call_set_lhs (call, lhs);
> > -      gsi_replace (gsi, call, true);
> > -    }
> > +  if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL))
> > +    build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], 
> > ops[1]);
> >  }
> >
> >  /* Recognize for unsigned x
> > @@ -6078,7 +6101,7 @@ math_opts_dom_walker::after_dom_children (basic_block 
> > bb)
> >               break;
> >
> >             case BIT_IOR_EXPR:
> > -             match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> > +             match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
> >               /* fall-through  */
> >             case BIT_XOR_EXPR:
> >               match_uaddc_usubc (&gsi, stmt, code);
> > @@ -6089,6 +6112,10 @@ math_opts_dom_walker::after_dom_children 
> > (basic_block bb)
> >               match_single_bit_test (&gsi, stmt);
> >               break;
> >
> > +           case COND_EXPR:
> > +             match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> > +             break;
> > +
> >             default:;
> >             }
> >         }
> > --
> > 2.34.1
> >

Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int

Reply via email to