On Tue, Aug 6, 2024 at 3:21 AM Li, Pan2 <[email protected]> wrote:
>
> Hi Richard,
>
> It looks like the plus will have additional convert to unsigned in int8 and
> int16, see below example in test.c.006t.gimple.
> And we need these convert ops in one matching pattern to cover all int scalar
> types.
Ah, yeah - that's the usual (premature) frontend optimization to
shorten operations after the standard
mandated standard conversion (to 'int' in this case).
> I am not sure if there is a better way here, given convert in matching
> pattern is not very elegant up to a point.
>
> int16_t
> add_i16 (int16_t a, int16_t b)
> {
> int16_t sum = a + b;
> return sum;
> }
>
> int32_t
> add_i32 (int32_t a, int32_t b)
> {
> int32_t sum = a + b;
> return sum;
> }
>
> ------- 006t.gimple -------
> int16_t add_i16 (int16_t a, int16_t b)
> {
> int16_t D.2815;
> int16_t sum;
>
> a.0_1 = (unsigned short) a;
> b.1_2 = (unsigned short) b;
> _3 = a.0_1 + b.1_2;
> sum = (int16_t) _3;
> D.2815 = sum;
> return D.2815;
> }
>
> int32_t add_i32 (int32_t a, int32_t b)
> {
> int32_t D.2817;
> int32_t sum;
>
> sum = a + b;
> D.2817 = sum;
> return D.2817;
> }
>
> Pan
>
> -----Original Message-----
> From: Li, Pan2
> Sent: Monday, August 5, 2024 9:52 PM
> To: Richard Biener <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]
> Subject: RE: [PATCH v1] Match: Support form 1 for scalar signed integer
> .SAT_ADD
>
> Thanks Richard for comments.
>
> > The convert looks odd to me given @0 is involved in both & operands.
>
> The convert is introduced as the GIMPLE IL is somehow different for int8_t
> when compares to int32_t or int64_t.
> There are some additional ops convert to unsigned for plus, see below line
> 8-9 and line 22-23.
> But we cannot see similar GIMPLE IL for int32_t and int64_t. To reconcile the
> types from int8_t to int64_t, add the
> convert here.
>
> Or may be I have some mistake in the example, let me revisit it and send v2
> if no surprise.
>
> 4 │ __attribute__((noinline))
> 5 │ int8_t sat_s_add_int8_t_fmt_1 (int8_t x, int8_t y)
> 6 │ {
> 7 │ int8_t sum;
> 8 │ unsigned char x.1_1;
> 9 │ unsigned char y.2_2;
> 10 │ unsigned char _3;
> 11 │ signed char _4;
> 12 │ signed char _5;
> 13 │ int8_t _6;
> 14 │ _Bool _11;
> 15 │ signed char _12;
> 16 │ signed char _13;
> 17 │ signed char _14;
> 18 │ signed char _22;
> 19 │ signed char _23;
> 20 │
> 21 │ <bb 2> [local count: 1073741822]:
> 22 │ x.1_1 = (unsigned char) x_7(D);
> 23 │ y.2_2 = (unsigned char) y_8(D);
> 24 │ _3 = x.1_1 + y.2_2;
> 25 │ sum_9 = (int8_t) _3;
> 26 │ _4 = x_7(D) ^ y_8(D);
> 27 │ _5 = x_7(D) ^ sum_9;
> 28 │ _23 = ~_4;
> 29 │ _22 = _5 & _23;
> 30 │ if (_22 < 0)
> 31 │ goto <bb 3>; [41.00%]
> 32 │ else
> 33 │ goto <bb 4>; [59.00%]
> 34 │
> 35 │ <bb 3> [local count: 259738146]:
> 36 │ _11 = x_7(D) < 0;
> 37 │ _12 = (signed char) _11;
> 38 │ _13 = -_12;
> 39 │ _14 = _13 ^ 127;
> 40 │
> 41 │ <bb 4> [local count: 1073741824]:
> 42 │ # _6 = PHI <_14(3), sum_9(2)>
> 43 │ return _6;
> 44 │
> 45 │ }
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <[email protected]>
> Sent: Monday, August 5, 2024 7:16 PM
> To: Li, Pan2 <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]
> Subject: Re: [PATCH v1] Match: Support form 1 for scalar signed integer
> .SAT_ADD
>
> On Mon, Aug 5, 2024 at 9:14 AM <[email protected]> wrote:
> >
> > From: Pan Li <[email protected]>
> >
> > This patch would like to support the form 1 of the scalar signed
> > integer .SAT_ADD. Aka below example:
> >
> > Form 1:
> > #define DEF_SAT_S_ADD_FMT_1(T) \
> > T __attribute__((noinline)) \
> > sat_s_add_##T##_fmt_1 (T x, T y) \
> > { \
> > T min = (T)1u << (sizeof (T) * 8 - 1); \
> > T max = min - 1; \
> > return (x ^ y) < 0 \
> > ? (T)(x + y) \
> > : ((T)(x + y) ^ x) >= 0 \
> > ? (T)(x + y) \
> > : x < 0 ? min : max; \
> > }
> >
> > DEF_SAT_S_ADD_FMT_1 (int64_t)
> >
> > We can tell the difference before and after this patch if backend
> > implemented the ssadd<m>3 pattern similar as below.
> >
> > Before this patch:
> > 4 │ __attribute__((noinline))
> > 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> > 6 │ {
> > 7 │ long int _1;
> > 8 │ long int _2;
> > 9 │ long int _3;
> > 10 │ int64_t _4;
> > 11 │ long int _7;
> > 12 │ _Bool _9;
> > 13 │ long int _10;
> > 14 │ long int _11;
> > 15 │ long int _12;
> > 16 │ long int _13;
> > 17 │
> > 18 │ ;; basic block 2, loop depth 0
> > 19 │ ;; pred: ENTRY
> > 20 │ _1 = x_5(D) ^ y_6(D);
> > 21 │ _13 = x_5(D) + y_6(D);
> > 22 │ _3 = x_5(D) ^ _13;
> > 23 │ _2 = ~_1;
> > 24 │ _7 = _2 & _3;
> > 25 │ if (_7 >= 0)
> > 26 │ goto <bb 4>; [59.00%]
> > 27 │ else
> > 28 │ goto <bb 3>; [41.00%]
> > 29 │ ;; succ: 4
> > 30 │ ;; 3
> > 31 │
> > 32 │ ;; basic block 3, loop depth 0
> > 33 │ ;; pred: 2
> > 34 │ _9 = x_5(D) < 0;
> > 35 │ _10 = (long int) _9;
> > 36 │ _11 = -_10;
> > 37 │ _12 = _11 ^ 9223372036854775807;
> > 38 │ ;; succ: 4
> > 39 │
> > 40 │ ;; basic block 4, loop depth 0
> > 41 │ ;; pred: 2
> > 42 │ ;; 3
> > 43 │ # _4 = PHI <_13(2), _12(3)>
> > 44 │ return _4;
> > 45 │ ;; succ: EXIT
> > 46 │
> > 47 │ }
> >
> > After this patch:
> > 4 │ __attribute__((noinline))
> > 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> > 6 │ {
> > 7 │ int64_t _4;
> > 8 │
> > 9 │ ;; basic block 2, loop depth 0
> > 10 │ ;; pred: ENTRY
> > 11 │ _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> > 12 │ return _4;
> > 13 │ ;; succ: EXIT
> > 14 │
> > 15 │ }
> >
> > The below test suites are passed for this patch.
> > * The rv64gcv fully regression test.
> > * The x86 bootstrap test.
> > * The x86 fully regression test.
> >
> > gcc/ChangeLog:
> >
> > * match.pd: Add the matching for signed .SAT_ADD.
> > * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
> > matching func decl.
> > (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
> > to ...
> > (match_saturation_add): ... here.
> > (math_opts_dom_walker::after_dom_children): Update the above renamed
> > func from caller.
> >
> > Signed-off-by: Pan Li <[email protected]>
> > ---
> > gcc/match.pd | 14 +++++++++++++
> > gcc/tree-ssa-math-opts.cc | 42 ++++++++++++++++++++++++++++++++++-----
> > 2 files changed, 51 insertions(+), 5 deletions(-)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index c9c8478d286..0a2ffc733d3 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3311,6 +3311,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > }
> > (if (otype_precision < itype_precision && wi::eq_p (trunc_max,
> > int_cst))))))
> >
> > +/* Signed saturation add, case 1:
> > + T min = (T)1u << (sizeof (T) * 8 - 1);
> > + T max = min - 1;
> > + SAT_S_ADD = (X ^ Y) < 0
> > + ? (X + Y)
> > + : ((T)(X + Y) ^ X) >= 0 ? (X + Y) : X < 0 ? min : max. */
> > +(match (signed_integer_sat_add @0 @1)
> > + (cond^ (ge (bit_and:c (bit_xor @0 (convert? @2)) (bit_not (bit_xor @0
> > @1)))
>
> This matches arbitrary Z in (X ^ (T)Z) & ~(X ^ Y) which cannot be intended.
> The GIMPLE IL in the comment below suggests Z == X + Y?
>
> The convert looks odd to me given @0 is involved in both & operands.
> The comment above has the same logic error.
>
> I believe the bit_xor lack :c
>
> > + integer_zerop)
>
> Please indent this to line up with the first operand of the 'ge' to make it
> better readable.
>
> > + (convert? (plus@2 (convert1? @0) (convert1? @1)))
>
> Same with the converts. The plus needs :c I think. Is this about
> common sign-conversions being hoisted from (int)x + (int)y -> (int)(x+y)?
>
> Note all the :c and conditional converts makes this a quite heavy pattern
> (all combinations of swaps and converts gets code).
>
> > + (bit_xor (negate (convert (lt @0 integer_zerop))) max_value))
> > + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> > + && types_match (type, @0, @1))))
> > +
> > /* x > y && x != XXX_MIN --> x > y
> > x > y && x == XXX_MIN --> false . */
> > (for eqne (eq ne)
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index 8d96a4c964b..d5c9b475f72 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree,
> > tree*, tree (*)(tree));
> > extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> > extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree
> > (*)(tree));
> >
> > +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> > +
> > static void
> > build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn
> > fn,
> > tree lhs, tree op_0, tree op_1)
> > @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator
> > *gsi, gassign *stmt)
> > }
> >
> > /*
> > - * Try to match saturation unsigned add with PHI.
> > + * Try to match saturation add with PHI.
> > + * For unsigned integer:
> > * <bb 2> :
> > * _1 = x_3(D) + y_4(D);
> > * if (_1 >= x_3(D))
> > @@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator
> > *gsi, gassign *stmt)
> > * # _2 = PHI <255(2), _1(3)>
> > * =>
> > * <bb 4> [local count: 1073741824]:
> > - * _2 = .SAT_ADD (x_4(D), y_5(D)); */
> > + * _2 = .SAT_ADD (x_4(D), y_5(D));
> > + *
> > + * For signed integer:
> > + * _1 = x_5(D) ^ y_6(D);
> > + * _13 = x_5(D) + y_6(D);
> > + * _3 = x_5(D) ^ _13;
> > + * _2 = ~_1;
> > + * _7 = _2 & _3;
> > + * if (_7 >= 0)
> > + * goto <bb 4>; [59.00%]
> > + * else
> > + * goto <bb 3>; [41.00%]
> > + * ;; succ: 4
> > + * ;; 3
> > + * ;; basic block 3, loop depth 0
> > + * ;; pred: 2
> > + * _9 = x_5(D) < 0;
> > + * _10 = (long int) _9;
> > + * _11 = -_10;
> > + * _12 = _11 ^ 9223372036854775807;
> > + * ;; succ: 4
> > + * ;; basic block 4, loop depth 0
> > + * ;; pred: 2
> > + * ;; 3
> > + * # _4 = PHI <_13(2), _12(3)>
> > + * =>
> > + * ;; basic block 2, loop depth 0
> > + * ;; pred: ENTRY
> > + * _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */
> >
> > static void
> > -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> > +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> > {
> > if (gimple_phi_num_args (phi) != 2)
> > return;
> > @@ -4097,7 +4128,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator
> > *gsi, gphi *phi)
> > tree ops[2];
> > tree phi_result = gimple_phi_result (phi);
> >
> > - if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> > + if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> > + || gimple_signed_integer_sat_add (phi_result, ops, NULL))
> > build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
> > ops[0], ops[1]);
> > }
> > @@ -6097,7 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block
> > bb)
> > gsi_next (&psi))
> > {
> > gimple_stmt_iterator gsi = gsi_after_labels (bb);
> > - match_unsigned_saturation_add (&gsi, psi.phi ());
> > + match_saturation_add (&gsi, psi.phi ());
> > match_unsigned_saturation_sub (&gsi, psi.phi ());
> > }
> >
> > --
> > 2.43.0
> >