On Wed, Aug 23, 2023 at 11:51 PM Andrew Pinski via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > The patterns that were added in r13-4620-g4d9db4bdd458, missed that > (a > b) and (a <= b) are not inverse of each other for floating point > comparisons (if NaNs are supported). Even though there was a check for > intergal types, it was only for the result of the cond rather for the > type of what is being compared. The fix is to check to see if cmp and > icmp are inverse of each other by using the invert_tree_comparison function. > > OK for trunk and GCC 13 branch? Bootstrapped and tested on x86_64-linux-gnu > with no regressions.
OK. Thanks, Richard. > I added the testcase to execute/ieee as it requires support for NAN. > > PR tree-optimization/111109 > > gcc/ChangeLog: > > * match.pd (ior(cond,cond), ior(vec_cond,vec_cond)): > Add check to make sure cmp and icmp are inverse. > > gcc/testsuite/ChangeLog: > > * gcc.c-torture/execute/ieee/fp-cmp-cond-1.c: New test. > --- > gcc/match.pd | 11 ++- > .../execute/ieee/fp-cmp-cond-1.c | 78 +++++++++++++++++++ > 2 files changed, 86 insertions(+), 3 deletions(-) > create mode 100644 gcc/testsuite/gcc.c-torture/execute/ieee/fp-cmp-cond-1.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index 85b7d323a19..b666d73b189 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -2087,6 +2087,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (bit_and:c (convert? (cmp@0 @01 @02)) @3) > (bit_and:c (convert? (icmp@4 @01 @02)) @5)) > (if (INTEGRAL_TYPE_P (type) > + && invert_tree_comparison (cmp, HONOR_NANS (@01)) == icmp > /* The scalar version has to be canonicalized after vectorization > because it makes unconditional loads conditional ones, which > means we lose vectorization because the loads may trap. */ > @@ -2101,6 +2102,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (cond (cmp@0 @01 @02) @3 zerop) > (cond (icmp@4 @01 @02) @5 zerop)) > (if (INTEGRAL_TYPE_P (type) > + && invert_tree_comparison (cmp, HONOR_NANS (@01)) == icmp > /* The scalar version has to be canonicalized after vectorization > because it makes unconditional loads conditional ones, which > means we lose vectorization because the loads may trap. */ > @@ -2113,13 +2115,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (bit_ior > (bit_and:c (vec_cond:s (cmp@0 @6 @7) @4 @5) @2) > (bit_and:c (vec_cond:s (icmp@1 @6 @7) @4 @5) @3)) > - (if (integer_zerop (@5)) > + (if (integer_zerop (@5) > + && invert_tree_comparison (cmp, HONOR_NANS (@6)) == icmp) > (switch > (if (integer_onep (@4)) > (bit_and (vec_cond @0 @2 @3) @4)) > (if (integer_minus_onep (@4)) > (vec_cond @0 @2 @3))) > - (if (integer_zerop (@4)) > + (if (integer_zerop (@4) > + && invert_tree_comparison (cmp, HONOR_NANS (@6)) == icmp) > (switch > (if (integer_onep (@5)) > (bit_and (vec_cond @0 @3 @2) @5)) > @@ -2132,7 +2136,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (bit_ior > (vec_cond:s (cmp@0 @4 @5) @2 integer_zerop) > (vec_cond:s (icmp@1 @4 @5) @3 integer_zerop)) > - (vec_cond @0 @2 @3))) > + (if (invert_tree_comparison (cmp, HONOR_NANS (@4)) == icmp) > + (vec_cond @0 @2 @3)))) > > /* Transform X & -Y into X * Y when Y is { 0 or 1 }. */ > (simplify > diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/fp-cmp-cond-1.c > b/gcc/testsuite/gcc.c-torture/execute/ieee/fp-cmp-cond-1.c > new file mode 100644 > index 00000000000..4a3c4b0eee2 > --- /dev/null > +++ b/gcc/testsuite/gcc.c-torture/execute/ieee/fp-cmp-cond-1.c > @@ -0,0 +1,78 @@ > +/* PR tree-optimization/111109 */ > + > +/* > + f should return 0 if either fa and fb are a nan. > + Rather than the value of a or b. > +*/ > +__attribute__((noipa)) > +int f(int a, int b, float fa, float fb) { > + const _Bool c = fa < fb; > + const _Bool c1 = fa >= fb; > + return (c * a) | (c1 * b); > +} > + > +/* > + f1 should return 0 if either fa and fb are a nan. > + Rather than the value of a&1 or b&1. > +*/ > +__attribute__((noipa)) > +int f1(int a, int b, float fa, float fb) { > + const _Bool c = fa < fb; > + const _Bool c1 = fa >= fb; > + return (c & a) | (c1 & b); > +} > + > +#if __SIZEOF_INT__ == __SIZEOF_FLOAT__ > +typedef int v4si __attribute__ ((vector_size (1*sizeof(int)))); > +typedef float v4sf __attribute__ ((vector_size (1*sizeof(float)))); > +/* > + fvf0 should return {0} if either fa and fb are a nan. > + Rather than the value of a or b. > +*/ > +__attribute__((noipa)) > +v4si vf0(v4si a, v4si b, v4sf fa, v4sf fb) { > + const v4si c = fa < fb; > + const v4si c1 = fa >= fb; > + return (c & a) | (c1 & b); > +} > + > + > +#endif > + > +int main(void) > +{ > + float a = __builtin_nan(""); > + > + if (f(-1,-1, a, a) != 0) > + __builtin_abort(); > + if (f(-1,-1, a, 0) != 0) > + __builtin_abort(); > + if (f(-1,-1, 0, a) != 0) > + __builtin_abort(); > + if (f(-1,-1, 0, 0) != -1) > + __builtin_abort(); > + > + > + if (f1(1,1, a, a) != 0) > + __builtin_abort(); > + if (f1(1,1, a, 0) != 0) > + __builtin_abort(); > + if (f1(1,1, 0, a) != 0) > + __builtin_abort(); > + if (f1(1,1, 0, 0) != 1) > + __builtin_abort(); > + > +#if __SIZEOF_INT__ == __SIZEOF_FLOAT__ > + v4si b = {-1}; > + v4sf c = {a}; > + v4sf d = {0.0}; > + if (vf0(b,b, c, c)[0] != 0) > + __builtin_abort(); > + if (vf0(b,b, c, d)[0] != 0) > + __builtin_abort(); > + if (vf0(b,b, d, c)[0] != 0) > + __builtin_abort(); > + if (vf0(b,b, d, d)[0] != b[0]) > + __builtin_abort(); > +#endif > +} > -- > 2.31.1 >