On Thu, Oct 31, 2024 at 7:29 AM <[email protected]> wrote:
>
> From: Pan Li <[email protected]>
>
> There are sorts of forms for the unsigned SAT_ADD. Some of them are
> complicated while others are cheap. This patch would like to simplify
> the complicated form into the cheap ones. For example as below:
>
> From the form 4 (branch):
> SAT_U_ADD = (X + Y) < x ? -1 : (X + Y).
>
> To (branchless):
> SAT_U_ADD = (X + Y) | - ((X + Y) < X).
>
> #define T uint8_t
>
> T sat_add_u_1 (T x, T y)
> {
> return (T)(x + y) < x ? -1 : (x + y);
> }
>
> Before this patch:
> 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y)
> 2 │ {
> 3 │ uint8_t D.2809;
> 4 │
> 5 │ _1 = x + y;
> 6 │ if (x <= _1) goto <D.2810>; else goto <D.2811>;
> 7 │ <D.2810>:
> 8 │ D.2809 = x + y;
> 9 │ goto <D.2812>;
> 10 │ <D.2811>:
> 11 │ D.2809 = 255;
> 12 │ <D.2812>:
> 13 │ return D.2809;
> 14 │ }
>
> After this patch:
> 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y)
> 2 │ {
> 3 │ uint8_t D.2809;
> 4 │
> 5 │ _1 = x + y;
> 6 │ _2 = x + y;
> 7 │ _3 = x > _2;
> 8 │ _4 = (unsigned char) _3;
> 9 │ _5 = -_4;
> 10 │ D.2809 = _1 | _5;
> 11 │ return D.2809;
> 12 │ }
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.
>
> gcc/ChangeLog:
>
> * match.pd: Remove unsigned branch form 4 for SAT_ADD, and
> add simplify to branchless instead.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/sat_arith_simplify.h: New test.
> * gcc.dg/sat_u_add-simplify-2-u16.c: New test.
> * gcc.dg/sat_u_add-simplify-2-u32.c: New test.
> * gcc.dg/sat_u_add-simplify-2-u64.c: New test.
> * gcc.dg/sat_u_add-simplify-2-u8.c: New test.
>
> Signed-off-by: Pan Li <[email protected]>
> ---
> gcc/match.pd | 23 +++++++++----------
> gcc/testsuite/gcc.dg/sat_arith_simplify.h | 10 ++++++++
> .../gcc.dg/sat_u_add-simplify-2-u16.c | 11 +++++++++
> .../gcc.dg/sat_u_add-simplify-2-u32.c | 11 +++++++++
> .../gcc.dg/sat_u_add-simplify-2-u64.c | 11 +++++++++
> .../gcc.dg/sat_u_add-simplify-2-u8.c | 11 +++++++++
> 6 files changed, 65 insertions(+), 12 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/sat_arith_simplify.h
> create mode 100644 gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u16.c
> create mode 100644 gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u32.c
> create mode 100644 gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u64.c
> create mode 100644 gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u8.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index c851ac56e37..8425d7c4f20 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3146,18 +3146,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (match (unsigned_integer_sat_add @0 @1)
> (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
>
> -/* Simplify SAT_U_ADD to the cheap form
> - From: SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1.
> - To: SAT_U_ADD = (X + Y) | - ((X + Y) < X). */
> -(simplify (cond (ge (plus:c@2 @0 @1) @0) @2 integer_minus_onep)
> - (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> - && types_match (type, @0, @1))
> - (bit_ior @2 (negate (convert (lt @2 @0))))))
> -
> -/* Unsigned saturation add, case 4 (branch with lt):
> - SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */
> -(match (unsigned_integer_sat_add @0 @1)
> - (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2))
> +/* Simplify sorts of SAT_U_ADD forms to the cheap one.
> + The cheap form: SAT_U_ADD = (X + Y) | - ((X + Y) < X). */
> +(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type))
> + /* From SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1. */
> + (simplify (cond (ge (plus:c@2 @0 @1) @0) @2 integer_minus_onep)
> + (if (types_match (type, @0, @1))
> + (bit_ior (plus@2 @0 @1) (negate (convert (lt @2 @0))))))
> + /* From SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */
> + (simplify (cond (lt (plus:c@2 @0 @1) @0) integer_minus_onep @2)
> + (if (types_match (type, @0, @1))
> + (bit_ior (plus@2 @0 @1) (negate (convert (lt @2 @0)))))))
Sorry for falling back in reviewing - it's not exactly clear the "cheap" form is
cheaper. When I count the number of gimple statements (sub-expressions)
the original appears as 3 while the result looks to have 5. The catch is of
course that the original might involve control flow and a PHI.
The pattern will apply during late PHI-OPT or during GENERIC folding
done by the frontends or gimplfiication - you scan the gimplification dump
below, so having it apply there means it will influence inlining heuristics
for example. I wonder which variant is considered larger by its heuristic.
What do others think of the early canonicalization of these to
straight-line code?
Thanks,
Richard.
> /* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW):
> SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */
> diff --git a/gcc/testsuite/gcc.dg/sat_arith_simplify.h
> b/gcc/testsuite/gcc.dg/sat_arith_simplify.h
> new file mode 100644
> index 00000000000..46ac00426b2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/sat_arith_simplify.h
> @@ -0,0 +1,10 @@
> +#ifndef HAVE_DEFINED_SAT_ARITH_SIMPLIFY_H
> +#define HAVE_DEFINED_SAT_ARITH_SIMPLIFY_H
> +
> +#define DEF_SAT_U_ADD_2(T) \
> +T sat_u_add_##T##_2 (T x, T y) \
> +{ \
> + return (T)(x + y) < x ? -1 : (x + y); \
> +}
> +
> +#endif
> diff --git a/gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u16.c
> b/gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u16.c
> new file mode 100644
> index 00000000000..b170b35096c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u16.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-gimple-details" } */
> +
> +#include <stdint.h>
> +#include "sat_arith_simplify.h"
> +
> +DEF_SAT_U_ADD_2 (uint16_t)
> +
> +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */
> +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */
> +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */
> diff --git a/gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u32.c
> b/gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u32.c
> new file mode 100644
> index 00000000000..8830ed7b878
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u32.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-gimple-details" } */
> +
> +#include <stdint.h>
> +#include "sat_arith_simplify.h"
> +
> +DEF_SAT_U_ADD_2 (uint32_t)
> +
> +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */
> +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */
> +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */
> diff --git a/gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u64.c
> b/gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u64.c
> new file mode 100644
> index 00000000000..76c4d4bddaa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u64.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-gimple-details" } */
> +
> +#include <stdint.h>
> +#include "sat_arith_simplify.h"
> +
> +DEF_SAT_U_ADD_2 (uint64_t)
> +
> +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */
> +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */
> +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */
> diff --git a/gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u8.c
> b/gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u8.c
> new file mode 100644
> index 00000000000..b034b8eedb1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/sat_u_add-simplify-2-u8.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-gimple-details" } */
> +
> +#include <stdint.h>
> +#include "sat_arith_simplify.h"
> +
> +DEF_SAT_U_ADD_2 (uint8_t)
> +
> +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */
> +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */
> +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */
> --
> 2.43.0
>