On Wed, Jun 5, 2024 at 9:38 AM Li, Pan2 <pan2...@intel.com> wrote: > > Thanks Richard, will commit after the rebased pass the regression test. > > Pan > > -----Original Message----- > From: Richard Biener <richard.guent...@gmail.com> > Sent: Wednesday, June 5, 2024 3:19 PM > To: Li, Pan2 <pan2...@intel.com> > Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@gmail.com; > tamar.christ...@arm.com > Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned > scalar int > > On Tue, May 28, 2024 at 10:29 AM <pan2...@intel.com> wrote: > > > > From: Pan Li <pan2...@intel.com> > > > > This patch would like to add the middle-end presentation for the > > saturation sub. Aka set the result of add to the min when downflow. > > It will take the pattern similar as below. > > > > SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)); > > > > For example for uint8_t, we have > > > > * SAT_SUB (255, 0) => 255 > > * SAT_SUB (1, 2) => 0 > > * SAT_SUB (254, 255) => 0 > > * SAT_SUB (0, 255) => 0 > > > > Given below SAT_SUB for uint64 > > > > uint64_t sat_sub_u64 (uint64_t x, uint64_t y) > > { > > return (x + y) & (- (uint64_t)((x >= y))); > > }
Is the above testcase correct? You need "(x + y)" as the first term. BTW: After applying your patch, I'm not able to produce .SAT_SUB with x86_64 and the following testcase: --cut here-- typedef unsigned short T; void foo (T *out, T *x, T *y, int n) { int i; for (i = 0; i < n; i++) out[i] = (x[i] - y[i]) & (-(T)(x[i] >= y[i])); } --cut here-- with gcc -O2 -ftree-vectorize -msse2 I think that all relevant optabs were added for x86 in https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b59de4113262f2bee14147eb17eb3592f03d9556 as part of the commit for PR112600, comment 8. Uros. > > > > Before this patch: > > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y) > > { > > _Bool _1; > > long unsigned int _3; > > uint64_t _6; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _1 = x_4(D) >= y_5(D); > > _3 = x_4(D) - y_5(D); > > _6 = _1 ? _3 : 0; > > return _6; > > ;; succ: EXIT > > } > > > > After this patch: > > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y) > > { > > uint64_t _6; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call] > > return _6; > > ;; succ: EXIT > > } > > > > The below tests are running for this patch: > > *. The riscv fully regression tests. > > *. The x86 bootstrap tests. > > *. The x86 fully regression tests. > > OK. > > Thanks, > Richard. > > > PR target/51492 > > PR target/112600 > > > > gcc/ChangeLog: > > > > * internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB. > > * match.pd: Add new match for SAT_SUB. > > * optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub. > > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add > > new decl for generated in match.pd. > > (build_saturation_binary_arith_call): Add new helper function > > to build the gimple call to binary SAT alu. > > (match_saturation_arith): Rename from. > > (match_unsigned_saturation_add): Rename to. > > (match_unsigned_saturation_sub): Add new func to match the > > unsigned sat sub. > > (math_opts_dom_walker::after_dom_children): Add SAT_SUB matching > > try when COND_EXPR. > > > > Signed-off-by: Pan Li <pan2...@intel.com> > > --- > > gcc/internal-fn.def | 1 + > > gcc/match.pd | 14 ++++++++ > > gcc/optabs.def | 4 +-- > > gcc/tree-ssa-math-opts.cc | 67 +++++++++++++++++++++++++++------------ > > 4 files changed, 64 insertions(+), 22 deletions(-) > > > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > > index 25badbb86e5..24539716e5b 100644 > > --- a/gcc/internal-fn.def > > +++ b/gcc/internal-fn.def > > @@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | > > ECF_NOTHROW, first, > > smulhrs, umulhrs, binary) > > > > DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, > > binary) > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, > > binary) > > > > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 024e3350465..3e334533ff8 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > (match (unsigned_integer_sat_add @0 @1) > > (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > > > > +/* Unsigned saturation sub, case 1 (branch with gt): > > + SAT_U_SUB = X > Y ? X - Y : 0 */ > > +(match (unsigned_integer_sat_sub @0 @1) > > + (cond (gt @0 @1) (minus @0 @1) integer_zerop) > > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > > + && types_match (type, @0, @1)))) > > + > > +/* Unsigned saturation sub, case 2 (branch with ge): > > + SAT_U_SUB = X >= Y ? X - Y : 0. */ > > +(match (unsigned_integer_sat_sub @0 @1) > > + (cond (ge @0 @1) (minus @0 @1) integer_zerop) > > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > > + && types_match (type, @0, @1)))) > > + > > /* x > y && x != XXX_MIN --> x > y > > x > y && x == XXX_MIN --> false . */ > > (for eqne (eq ne) > > diff --git a/gcc/optabs.def b/gcc/optabs.def > > index 3f2cb46aff8..bc2611abdc2 100644 > > --- a/gcc/optabs.def > > +++ b/gcc/optabs.def > > @@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3") > > OPTAB_NX(sub_optab, "sub$Q$a3") > > OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc) > > OPTAB_VX(subv_optab, "sub$F$a3") > > -OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', > > gen_signed_fixed_libfunc) > > -OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', > > gen_unsigned_fixed_libfunc) > > +OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', > > gen_signed_fixed_libfunc) > > +OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', > > gen_unsigned_fixed_libfunc) > > OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', > > gen_int_fp_fixed_libfunc) > > OPTAB_NX(smul_optab, "mul$P$a3") > > OPTAB_NX(smul_optab, "mul$F$a3") > > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > > index 62da1c5ee08..4717302b728 100644 > > --- a/gcc/tree-ssa-math-opts.cc > > +++ b/gcc/tree-ssa-math-opts.cc > > @@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple > > *cast_stmt, gimple *&use_stmt, > > } > > > > extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > > +extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); > > + > > +static void > > +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn > > fn, > > + tree lhs, tree op_0, tree op_1) > > +{ > > + if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), > > OPTIMIZE_FOR_BOTH)) > > + { > > + gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); > > + gimple_call_set_lhs (call, lhs); > > + gsi_replace (gsi, call, /* update_eh_info */ true); > > + } > > +} > > > > /* > > - * Try to match saturation arith pattern(s). > > - * 1. SAT_ADD (unsigned) > > - * _7 = _4 + _6; > > - * _8 = _4 > _7; > > - * _9 = (long unsigned int) _8; > > - * _10 = -_9; > > - * _12 = _7 | _10; > > - * => > > - * _12 = .SAT_ADD (_4, _6); */ > > + * Try to match saturation unsigned add. > > + * _7 = _4 + _6; > > + * _8 = _4 > _7; > > + * _9 = (long unsigned int) _8; > > + * _10 = -_9; > > + * _12 = _7 | _10; > > + * => > > + * _12 = .SAT_ADD (_4, _6); */ > > + > > static void > > -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > > +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) > > { > > - gcall *call = NULL; > > + tree ops[2]; > > + tree lhs = gimple_assign_lhs (stmt); > > > > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)) > > + build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], > > ops[1]); > > +} > > + > > +/* > > + * Try to match saturation unsigned sub. > > + * _1 = _4 >= _5; > > + * _3 = _4 - _5; > > + * _6 = _1 ? _3 : 0; > > + * => > > + * _6 = .SAT_SUB (_4, _5); */ > > + > > +static void > > +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt) > > +{ > > tree ops[2]; > > tree lhs = gimple_assign_lhs (stmt); > > > > - if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > > - && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > > - OPTIMIZE_FOR_BOTH)) > > - { > > - call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > > - gimple_call_set_lhs (call, lhs); > > - gsi_replace (gsi, call, true); > > - } > > + if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)) > > + build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], > > ops[1]); > > } > > > > /* Recognize for unsigned x > > @@ -6078,7 +6101,7 @@ math_opts_dom_walker::after_dom_children (basic_block > > bb) > > break; > > > > case BIT_IOR_EXPR: > > - match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > > + match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt)); > > /* fall-through */ > > case BIT_XOR_EXPR: > > match_uaddc_usubc (&gsi, stmt, code); > > @@ -6089,6 +6112,10 @@ math_opts_dom_walker::after_dom_children > > (basic_block bb) > > match_single_bit_test (&gsi, stmt); > > break; > > > > + case COND_EXPR: > > + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt)); > > + break; > > + > > default:; > > } > > } > > -- > > 2.34.1 > >