On Tue, Aug 13, 2024 at 6:31 AM Andrew Pinski <quic_apin...@quicinc.com> wrote: > > r13-4620-g4d9db4bdd458 Added a few patterns and some of them can be extended > to support XOR and PLUS. > This extends the patterns to support XOR and PLUS instead of just IOR. > > Bootstrapped and tested on x86_64-linux-gnu.
OK. > PR tree-optimization/103660 > > gcc/ChangeLog: > > * match.pd (`((a CMP b) ? c : 0) | ((a CMP' b) ? d : 0)`): Extend to > support > XOR and PLUS. > > gcc/testsuite/ChangeLog: > > * g++.dg/tree-ssa/pr103660-2.C: New test. > * g++.dg/tree-ssa/pr103660-3.C: New test. > * gcc.dg/tree-ssa/pr103660-2.c: New test. > * gcc.dg/tree-ssa/pr103660-3.c: New test. > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > --- > gcc/match.pd | 42 +++++++++++--------- > gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C | 30 +++++++++++++++ > gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C | 30 +++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c | 45 ++++++++++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c | 35 +++++++++++++++++ > 5 files changed, 163 insertions(+), 19 deletions(-) > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index c9c8478d286..b43ceb6def0 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -2356,18 +2356,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > /* Fold ((-(a < b) & c) | (-(a >= b) & d)) into a < b ? c : d. This is > canonicalized further and we recognize the conditional form: > - (a < b ? c : 0) | (a >= b ? d : 0) into a < b ? c : d. */ > - (simplify > - (bit_ior > - (cond (cmp@0 @01 @02) @3 zerop) > - (cond (icmp@4 @01 @02) @5 zerop)) > - (if (INTEGRAL_TYPE_P (type) > - && invert_tree_comparison (cmp, HONOR_NANS (@01)) == icmp > - /* The scalar version has to be canonicalized after vectorization > - because it makes unconditional loads conditional ones, which > - means we lose vectorization because the loads may trap. */ > - && canonicalize_math_after_vectorization_p ()) > - (cond @0 @3 @5))) > + (a < b ? c : 0) | (a >= b ? d : 0) into a < b ? c : d. > + Handle also ^ and + in replacement of `|`. */ > + (for op (bit_ior bit_xor plus) > + (simplify > + (op > + (cond (cmp@0 @01 @02) @3 zerop) > + (cond (icmp@4 @01 @02) @5 zerop)) > + (if (INTEGRAL_TYPE_P (type) > + && invert_tree_comparison (cmp, HONOR_NANS (@01)) == icmp > + /* The scalar version has to be canonicalized after vectorization > + because it makes unconditional loads conditional ones, which > + means we lose vectorization because the loads may trap. */ > + && canonicalize_math_after_vectorization_p ()) > + (cond @0 @3 @5)))) > > /* Vector Fold (((a < b) & c) | ((a >= b) & d)) into a < b ? c : d. > and ((~(a < b) & c) | (~(a >= b) & d)) into a < b ? c : d. */ > @@ -2391,13 +2393,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (vec_cond @0 @3 @2)))))) > > /* Scalar Vectorized Fold ((-(a < b) & c) | (-(a >= b) & d)) > - into a < b ? d : c. */ > - (simplify > - (bit_ior > - (vec_cond:s (cmp@0 @4 @5) @2 integer_zerop) > - (vec_cond:s (icmp@1 @4 @5) @3 integer_zerop)) > - (if (invert_tree_comparison (cmp, HONOR_NANS (@4)) == icmp) > - (vec_cond @0 @2 @3)))) > + into a < b ? d : c. > + Handle also ^ and + in replacement of `|`. */ > + (for op (bit_ior bit_xor plus) > + (simplify > + (op > + (vec_cond:s (cmp@0 @4 @5) @2 integer_zerop) > + (vec_cond:s (icmp@1 @4 @5) @3 integer_zerop)) > + (if (invert_tree_comparison (cmp, HONOR_NANS (@4)) == icmp) > + (vec_cond @0 @2 @3))))) > > /* Transform X & -Y into X * Y when Y is { 0 or 1 }. */ > (simplify > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C > b/gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C > new file mode 100644 > index 00000000000..95205c02bc3 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C > @@ -0,0 +1,30 @@ > +/* PR tree-optimization/103660 */ > +/* Vector type version. */ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-forwprop1-raw -Wno-psabi" } */ > + > +typedef int v4si __attribute((__vector_size__(4 * sizeof(int)))); > +#define funcs(OP,n) \ > +v4si min_##n(v4si a, v4si b) { \ > + v4si X = a < b ? a : 0; \ > + v4si Y = a >= b ? b : 0; \ > + return (X OP Y); \ > +} \ > +v4si f_##n(v4si a, v4si b, \ > + v4si c, v4si d) { \ > + v4si X = a < b ? c : 0; \ > + v4si Y = a >= b ? d : 0; \ > + return (X OP Y); \ > +} > + > + > +funcs(^, xor) > +funcs(+, plus) > + > +/* min_xor/min_plus should produce min<a,b> or `a < b ? a : b` depending on > if the target > + supports min on the vector type or not. */ > +/* f_xor/f_plus should produce (a < b) ? c : d */ > +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-not "plus_expr, " "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "(?:lt_expr|min_expr), " 4 > "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "(?:vec_cond_expr|min_expr), " 4 > "forwprop1" } } */ > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C > b/gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C > new file mode 100644 > index 00000000000..0800ad8e90e > --- /dev/null > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C > @@ -0,0 +1,30 @@ > +/* PR tree-optimization/103660 */ > +/* Vector type version. */ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-forwprop1-raw -Wno-psabi" } */ > + > +typedef int v4si __attribute((__vector_size__(4 * sizeof(int)))); > +#define funcs(OP,n) \ > +v4si min_##n(v4si a, v4si b) { \ > + v4si X = -(a < b) * a; \ > + v4si Y = -(a >= b) * b; \ > + return (X OP Y); \ > +} \ > +v4si f_##n(v4si a, v4si b, \ > + v4si c, v4si d) { \ > + v4si X = -(a < b) * c; \ > + v4si Y = -(a >= b) * d; \ > + return (X OP Y); \ > +} > + > + > +funcs(^, xor) > +funcs(+, plus) > + > +/* min_xor/min_plus should produce min<a,b> or `a < b ? a : b` depending on > if the target > + supports min on the vector type or not. */ > +/* f_xor/f_plus should produce (a < b) ? c : d */ > +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-not "plus_expr, " "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "(?:lt_expr|min_expr), " 4 > "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "(?:vec_cond_expr|min_expr), " 4 > "forwprop1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c > new file mode 100644 > index 00000000000..ce4da00a888 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c > @@ -0,0 +1,45 @@ > +/* PR tree-optimization/103660 */ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fgimple -fdump-tree-forwprop4-raw" } */ > + > +#define funcs(OP,n) \ > +__GIMPLE() \ > +int min_##n(int a, int b) { \ > + _Bool X; \ > + _Bool Y; \ > + int t; \ > + int t1; \ > + int t2; \ > + X = a < b; \ > + Y = a >= b; \ > + t1 = X ? a : 0; \ > + t2 = Y ? b : 0; \ > + t = t1 OP t2; \ > + return t; \ > +} \ > +__GIMPLE() \ > +int f_##n(int a, int b, int c, \ > + int d) { \ > + _Bool X; \ > + _Bool Y; \ > + int t; \ > + int t1; \ > + int t2; \ > + X = a < b; \ > + Y = a >= b; \ > + t1 = X ? c : 0; \ > + t2 = Y ? d : 0; \ > + t = t1 OP t2; \ > + return t; \ > +} > + > +funcs(^, xor) > +funcs(+, plus) > + > +/* min_xor/min_plus should produce min<a,b> */ > +/* f_xor/f_plus should produce (a < b) ? c : d */ > +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-not "plus_expr, " "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-times "min_expr, " 2 "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-times "lt_expr, " 2 "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-times "cond_expr, " 2 "forwprop4" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c > new file mode 100644 > index 00000000000..bd770b1b6d7 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c > @@ -0,0 +1,35 @@ > +/* PR tree-optimization/103660 */ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-forwprop4-raw" } */ > + > +#define funcs(OP,n) \ > +int min_##n(int a, int b) { \ > + int t; \ > + int t1; \ > + int t2; \ > + t1 = (a < b) * a; \ > + t2 = (a >= b) * b; \ > + t = t1 OP t2; \ > + return t; \ > +} \ > +int f_##n(int a, int b, int c, \ > + int d) { \ > + int t; \ > + int t1; \ > + int t2; \ > + t1 = (a < b) * c; \ > + t2 = (a >= b) * d; \ > + t = t1 OP t2; \ > + return t; \ > +} > + > +funcs(^, xor) > +funcs(+, plus) > + > +/* min_xor/min_plus should produce min<a,b> */ > +/* f_xor/f_plus should produce (a < b) ? c : d */ > +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-not "plus_expr, " "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-times "min_expr, " 2 "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-times "lt_expr, " 2 "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-times "cond_expr, " 2 "forwprop4" } } */ > -- > 2.43.0 >