Thanks Richard for comments.
> The convert looks odd to me given @0 is involved in both & operands.
The convert is introduced as the GIMPLE IL is somehow different for int8_t when
compares to int32_t or int64_t.
There are some additional ops convert to unsigned for plus, see below line 8-9
and line 22-23.
But we cannot see similar GIMPLE IL for int32_t and int64_t. To reconcile the
types from int8_t to int64_t, add the
convert here.
Or may be I have some mistake in the example, let me revisit it and send v2 if
no surprise.
4 │ __attribute__((noinline))
5 │ int8_t sat_s_add_int8_t_fmt_1 (int8_t x, int8_t y)
6 │ {
7 │ int8_t sum;
8 │ unsigned char x.1_1;
9 │ unsigned char y.2_2;
10 │ unsigned char _3;
11 │ signed char _4;
12 │ signed char _5;
13 │ int8_t _6;
14 │ _Bool _11;
15 │ signed char _12;
16 │ signed char _13;
17 │ signed char _14;
18 │ signed char _22;
19 │ signed char _23;
20 │
21 │ <bb 2> [local count: 1073741822]:
22 │ x.1_1 = (unsigned char) x_7(D);
23 │ y.2_2 = (unsigned char) y_8(D);
24 │ _3 = x.1_1 + y.2_2;
25 │ sum_9 = (int8_t) _3;
26 │ _4 = x_7(D) ^ y_8(D);
27 │ _5 = x_7(D) ^ sum_9;
28 │ _23 = ~_4;
29 │ _22 = _5 & _23;
30 │ if (_22 < 0)
31 │ goto <bb 3>; [41.00%]
32 │ else
33 │ goto <bb 4>; [59.00%]
34 │
35 │ <bb 3> [local count: 259738146]:
36 │ _11 = x_7(D) < 0;
37 │ _12 = (signed char) _11;
38 │ _13 = -_12;
39 │ _14 = _13 ^ 127;
40 │
41 │ <bb 4> [local count: 1073741824]:
42 │ # _6 = PHI <_14(3), sum_9(2)>
43 │ return _6;
44 │
45 │ }
Pan
-----Original Message-----
From: Richard Biener <[email protected]>
Sent: Monday, August 5, 2024 7:16 PM
To: Li, Pan2 <[email protected]>
Cc: [email protected]; [email protected]; [email protected];
[email protected]; [email protected]
Subject: Re: [PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD
On Mon, Aug 5, 2024 at 9:14 AM <[email protected]> wrote:
>
> From: Pan Li <[email protected]>
>
> This patch would like to support the form 1 of the scalar signed
> integer .SAT_ADD. Aka below example:
>
> Form 1:
> #define DEF_SAT_S_ADD_FMT_1(T) \
> T __attribute__((noinline)) \
> sat_s_add_##T##_fmt_1 (T x, T y) \
> { \
> T min = (T)1u << (sizeof (T) * 8 - 1); \
> T max = min - 1; \
> return (x ^ y) < 0 \
> ? (T)(x + y) \
> : ((T)(x + y) ^ x) >= 0 \
> ? (T)(x + y) \
> : x < 0 ? min : max; \
> }
>
> DEF_SAT_S_ADD_FMT_1 (int64_t)
>
> We can tell the difference before and after this patch if backend
> implemented the ssadd<m>3 pattern similar as below.
>
> Before this patch:
> 4 │ __attribute__((noinline))
> 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> 6 │ {
> 7 │ long int _1;
> 8 │ long int _2;
> 9 │ long int _3;
> 10 │ int64_t _4;
> 11 │ long int _7;
> 12 │ _Bool _9;
> 13 │ long int _10;
> 14 │ long int _11;
> 15 │ long int _12;
> 16 │ long int _13;
> 17 │
> 18 │ ;; basic block 2, loop depth 0
> 19 │ ;; pred: ENTRY
> 20 │ _1 = x_5(D) ^ y_6(D);
> 21 │ _13 = x_5(D) + y_6(D);
> 22 │ _3 = x_5(D) ^ _13;
> 23 │ _2 = ~_1;
> 24 │ _7 = _2 & _3;
> 25 │ if (_7 >= 0)
> 26 │ goto <bb 4>; [59.00%]
> 27 │ else
> 28 │ goto <bb 3>; [41.00%]
> 29 │ ;; succ: 4
> 30 │ ;; 3
> 31 │
> 32 │ ;; basic block 3, loop depth 0
> 33 │ ;; pred: 2
> 34 │ _9 = x_5(D) < 0;
> 35 │ _10 = (long int) _9;
> 36 │ _11 = -_10;
> 37 │ _12 = _11 ^ 9223372036854775807;
> 38 │ ;; succ: 4
> 39 │
> 40 │ ;; basic block 4, loop depth 0
> 41 │ ;; pred: 2
> 42 │ ;; 3
> 43 │ # _4 = PHI <_13(2), _12(3)>
> 44 │ return _4;
> 45 │ ;; succ: EXIT
> 46 │
> 47 │ }
>
> After this patch:
> 4 │ __attribute__((noinline))
> 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> 6 │ {
> 7 │ int64_t _4;
> 8 │
> 9 │ ;; basic block 2, loop depth 0
> 10 │ ;; pred: ENTRY
> 11 │ _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> 12 │ return _4;
> 13 │ ;; succ: EXIT
> 14 │
> 15 │ }
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.
>
> gcc/ChangeLog:
>
> * match.pd: Add the matching for signed .SAT_ADD.
> * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
> matching func decl.
> (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
> to ...
> (match_saturation_add): ... here.
> (math_opts_dom_walker::after_dom_children): Update the above renamed
> func from caller.
>
> Signed-off-by: Pan Li <[email protected]>
> ---
> gcc/match.pd | 14 +++++++++++++
> gcc/tree-ssa-math-opts.cc | 42 ++++++++++++++++++++++++++++++++++-----
> 2 files changed, 51 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index c9c8478d286..0a2ffc733d3 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3311,6 +3311,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> }
> (if (otype_precision < itype_precision && wi::eq_p (trunc_max,
> int_cst))))))
>
> +/* Signed saturation add, case 1:
> + T min = (T)1u << (sizeof (T) * 8 - 1);
> + T max = min - 1;
> + SAT_S_ADD = (X ^ Y) < 0
> + ? (X + Y)
> + : ((T)(X + Y) ^ X) >= 0 ? (X + Y) : X < 0 ? min : max. */
> +(match (signed_integer_sat_add @0 @1)
> + (cond^ (ge (bit_and:c (bit_xor @0 (convert? @2)) (bit_not (bit_xor @0 @1)))
This matches arbitrary Z in (X ^ (T)Z) & ~(X ^ Y) which cannot be intended.
The GIMPLE IL in the comment below suggests Z == X + Y?
The convert looks odd to me given @0 is involved in both & operands.
The comment above has the same logic error.
I believe the bit_xor lack :c
> + integer_zerop)
Please indent this to line up with the first operand of the 'ge' to make it
better readable.
> + (convert? (plus@2 (convert1? @0) (convert1? @1)))
Same with the converts. The plus needs :c I think. Is this about
common sign-conversions being hoisted from (int)x + (int)y -> (int)(x+y)?
Note all the :c and conditional converts makes this a quite heavy pattern
(all combinations of swaps and converts gets code).
> + (bit_xor (negate (convert (lt @0 integer_zerop))) max_value))
> + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> /* x > y && x != XXX_MIN --> x > y
> x > y && x == XXX_MIN --> false . */
> (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 8d96a4c964b..d5c9b475f72 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree,
> tree*, tree (*)(tree));
> extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
>
> +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> +
> static void
> build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn
> fn,
> tree lhs, tree op_0, tree op_1)
> @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator
> *gsi, gassign *stmt)
> }
>
> /*
> - * Try to match saturation unsigned add with PHI.
> + * Try to match saturation add with PHI.
> + * For unsigned integer:
> * <bb 2> :
> * _1 = x_3(D) + y_4(D);
> * if (_1 >= x_3(D))
> @@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator
> *gsi, gassign *stmt)
> * # _2 = PHI <255(2), _1(3)>
> * =>
> * <bb 4> [local count: 1073741824]:
> - * _2 = .SAT_ADD (x_4(D), y_5(D)); */
> + * _2 = .SAT_ADD (x_4(D), y_5(D));
> + *
> + * For signed integer:
> + * _1 = x_5(D) ^ y_6(D);
> + * _13 = x_5(D) + y_6(D);
> + * _3 = x_5(D) ^ _13;
> + * _2 = ~_1;
> + * _7 = _2 & _3;
> + * if (_7 >= 0)
> + * goto <bb 4>; [59.00%]
> + * else
> + * goto <bb 3>; [41.00%]
> + * ;; succ: 4
> + * ;; 3
> + * ;; basic block 3, loop depth 0
> + * ;; pred: 2
> + * _9 = x_5(D) < 0;
> + * _10 = (long int) _9;
> + * _11 = -_10;
> + * _12 = _11 ^ 9223372036854775807;
> + * ;; succ: 4
> + * ;; basic block 4, loop depth 0
> + * ;; pred: 2
> + * ;; 3
> + * # _4 = PHI <_13(2), _12(3)>
> + * =>
> + * ;; basic block 2, loop depth 0
> + * ;; pred: ENTRY
> + * _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */
>
> static void
> -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> {
> if (gimple_phi_num_args (phi) != 2)
> return;
> @@ -4097,7 +4128,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator
> *gsi, gphi *phi)
> tree ops[2];
> tree phi_result = gimple_phi_result (phi);
>
> - if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> + if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> + || gimple_signed_integer_sat_add (phi_result, ops, NULL))
> build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
> ops[0], ops[1]);
> }
> @@ -6097,7 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block
> bb)
> gsi_next (&psi))
> {
> gimple_stmt_iterator gsi = gsi_after_labels (bb);
> - match_unsigned_saturation_add (&gsi, psi.phi ());
> + match_saturation_add (&gsi, psi.phi ());
> match_unsigned_saturation_sub (&gsi, psi.phi ());
> }
>
> --
> 2.43.0
>