From: Pan Li <pan2...@intel.com> There are sorts of forms for the unsigned SAT_ADD. Some of them are complicated while others are cheap. This patch would like to simplify the complicated form into the cheap ones. For example as below:
>From the form 4 (branch): SUM = ADD_OVERFLOW (X, Y) SAT_U_ADD = IMAGPART_EXPR (SUM) == 0 ? REALPART_EXPR (SUM) : -1 To (branchless): SAT_U_ADD = (X + Y) | - ((X + Y) < X). #define T uint8_t T sat_add_u_1 (T x, T y) { T ret; return __builtin_add_overflow (x, y, &ret) == 0 ? ret : - 1; } Before this patch in phiopt2: 4 │ uint8_t sat_u_add_uint8_t_9 (uint8_t x, uint8_t y) 5 │ { 6 │ unsigned char _1; 7 │ unsigned char _2; 8 │ uint8_t _3; 9 │ __complex__ unsigned char _6; 10 │ 11 │ <bb 2> [local count: 1073741824]: 12 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); 13 │ _1 = REALPART_EXPR <_6>; 14 │ _2 = IMAGPART_EXPR <_6>; 15 │ if (_2 != 0) 16 │ goto <bb 3>; [35.00%] 17 │ else 18 │ goto <bb 4>; [65.00%] 19 │ 20 │ <bb 3> [local count: 375809640]: 21 │ 22 │ <bb 4> [local count: 1073741824]: 23 │ # _3 = PHI <255(3), _1(2)> 24 │ return _3; 25 │ 26 │ } After this patch 14 │ uint8_t sat_u_add_uint8_t_9 (uint8_t x, uint8_t y) 15 │ { 16 │ unsigned char _1; 17 │ __complex__ unsigned char _6; 18 │ unsigned char _8; 19 │ _Bool _9; 20 │ unsigned char _10; 21 │ unsigned char _11; 22 │ unsigned char _12; 23 │ 24 │ <bb 2> [local count: 1073741824]: 25 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); // Dead code 26 │ _1 = REALPART_EXPR <_6>; // Ditto 27 │ _8 = x_4(D) + y_5(D); 28 │ _9 = x_4(D) > _8; 29 │ _10 = (unsigned char) _9; 30 │ _11 = -_10; 31 │ _12 = _8 | _11; 32 │ return _12; 33 │ 34 │ } This patch also add 2 more imm matching for unsigned SAT_ADD due to remove the add_overflow match. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Remove unsigned branch form 4 and simplify to the branchless of unsigned SAT_ADD, add 2 new form for imm. Signed-off-by: Pan Li <pan2...@intel.com> --- gcc/match.pd | 45 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index f6f55491b89..8ed08b95bc0 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3160,14 +3160,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* From SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */ (simplify (cond (lt (plus:c@2 @0 @1) @0) integer_minus_onep @2) (if (types_match (type, @0, @1)) - (bit_ior (plus@2 @0 @1) (negate (convert (lt @2 @0))))))) - -/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW): - SUM = ADD_OVERFLOW (X, Y) - SAT_U_ADD = IMAGPART_EXPR (SUM) == 0 ? REALPART_EXPR (SUM) : -1. */ -(match (unsigned_integer_sat_add @0 @1) - (cond^ (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop) - (usadd_left_part_2 @0 @1) integer_minus_onep)) + (bit_ior (plus@2 @0 @1) (negate (convert (lt @2 @0)))))) + /* From SUM = ADD_OVERFLOW (X, Y) + SAT_U_ADD = IMAGPART_EXPR (SUM) == 0 ? REALPART_EXPR (SUM) : -1 */ + (simplify (cond (eq (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop) + (realpart @2) integer_minus_onep) + (if (types_match (type, @0, @1)) + (bit_ior (plus@3 @0 @1) (negate (convert (lt @3 @0))))))) /* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW): SUM = ADD_OVERFLOW (X, Y) @@ -3201,6 +3200,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0) && int_fits_type_p (@1, type)))) +/* Unsigned saturation add, case 11 (one op is imm): + SAT_U_ADD = (X + 3) | -(X > (umax - 3)). */ +(match (unsigned_integer_sat_add @0 @1) + (bit_ior:c (plus:c @0 INTEGER_CST@1) (negate (convert (gt @0 INTEGER_CST@2)))) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)) + (with + { + unsigned precision = TYPE_PRECISION (type); + wide_int cst_1 = wi::to_wide (@1); + wide_int cst_2 = wi::to_wide (@2); + wide_int max = wi::mask (precision, false, precision); + wide_int sum = wi::add (cst_1, cst_2); + } + (if (wi::eq_p (max, sum)))))) + +/* Unsigned saturation add, case 12 (one op is imm). Almost the same form + as the case 11 but the gimple changes when INTEGER_CST@1 is umax. */ +(match (unsigned_integer_sat_add @0 @1) + (bit_ior:c (plus:c @0 INTEGER_CST@1) (negate (convert (ne @0 integer_zerop)))) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)) + (with + { + unsigned precision = TYPE_PRECISION (type); + wide_int cst_1 = wi::to_wide (@1); + wide_int max = wi::mask (precision, false, precision); + } + (if (wi::eq_p (max, cst_1)))))) + /* Signed saturation add, case 1: T sum = (T)((UT)X + (UT)Y) SAT_S_ADD = (X ^ sum) & !(X ^ Y) < 0 ? (-(T)(X < 0) ^ MAX) : sum; -- 2.43.0