This adds the missing optimizations here. Note we don't need to match where CMP1 and CMP2 are complements of each other as that is already handled elsewhere.
I added a new executable testcase to make sure we optimize it correctly as I had originally messed up one of the entries for the resulting comparison to make sure they were 100% correct. OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. PR tree-optimization/107881 gcc/ChangeLog: * match.pd (`(a CMP1 b) ^ (a CMP2 b)`): New pattern. (`(a CMP1 b) == (a CMP2 b)`): New pattern. gcc/testsuite/ChangeLog: * gcc.c-torture/execute/pr107881-1.c: New test. * gcc.dg/tree-ssa/cmpeq-4.c: New test. * gcc.dg/tree-ssa/cmpxor-1.c: New test. --- gcc/match.pd | 20 +++ .../gcc.c-torture/execute/pr107881-1.c | 115 ++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/cmpeq-4.c | 51 ++++++++ gcc/testsuite/gcc.dg/tree-ssa/cmpxor-1.c | 51 ++++++++ 4 files changed, 237 insertions(+) create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr107881-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpeq-4.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpxor-1.c diff --git a/gcc/match.pd b/gcc/match.pd index e96e385c6fa..39c7ea1088f 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3154,6 +3154,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) { constant_boolean_node (true, type); }) )))))) +/* Optimize (a CMP b) ^ (a CMP b) */ +/* Optimize (a CMP b) != (a CMP b) */ +(for op (bit_xor ne) + (for cmp1 (lt lt lt le le le) + cmp2 (gt eq ne ge eq ne) + rcmp (ne le gt ne lt ge) + (simplify + (op:c (cmp1:c @0 @1) (cmp2:c @0 @1)) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0))) + (rcmp @0 @1))))) + +/* Optimize (a CMP b) == (a CMP b) */ +(for cmp1 (lt lt lt le le le) + cmp2 (gt eq ne ge eq ne) + rcmp (eq gt le eq ge lt) + (simplify + (eq:c (cmp1:c @0 @1) (cmp2:c @0 @1)) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0))) + (rcmp @0 @1)))) + /* We can't reassociate at all for saturating types. */ (if (!TYPE_SATURATING (type)) diff --git a/gcc/testsuite/gcc.c-torture/execute/pr107881-1.c b/gcc/testsuite/gcc.c-torture/execute/pr107881-1.c new file mode 100644 index 00000000000..063ec4c2797 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr107881-1.c @@ -0,0 +1,115 @@ +#define func(vol, op1, op2, op3) \ +_Bool op1##_##op2##_##op3##_##vol (int a, int b) \ +{ \ + vol _Bool x = op_##op1(a, b); \ + vol _Bool y = op_##op2(a, b); \ + return op_##op3(x, y); \ +} + +#define op_lt(a, b) ((a) < (b)) +#define op_le(a, b) ((a) <= (b)) +#define op_eq(a, b) ((a) == (b)) +#define op_ne(a, b) ((a) != (b)) +#define op_gt(a, b) ((a) > (b)) +#define op_ge(a, b) ((a) >= (b)) +#define op_xor(a, b) ((a) ^ (b)) + + +#define funcs(a) \ + a(lt,lt,ne) \ + a(lt,lt,eq) \ + a(lt,lt,xor) \ + a(lt,le,ne) \ + a(lt,le,eq) \ + a(lt,le,xor) \ + a(lt,gt,ne) \ + a(lt,gt,eq) \ + a(lt,gt,xor) \ + a(lt,ge,ne) \ + a(lt,ge,eq) \ + a(lt,ge,xor) \ + a(lt,eq,ne) \ + a(lt,eq,eq) \ + a(lt,eq,xor) \ + a(lt,ne,ne) \ + a(lt,ne,eq) \ + a(lt,ne,xor) \ + \ + a(le,lt,ne) \ + a(le,lt,eq) \ + a(le,lt,xor) \ + a(le,le,ne) \ + a(le,le,eq) \ + a(le,le,xor) \ + a(le,gt,ne) \ + a(le,gt,eq) \ + a(le,gt,xor) \ + a(le,ge,ne) \ + a(le,ge,eq) \ + a(le,ge,xor) \ + a(le,eq,ne) \ + a(le,eq,eq) \ + a(le,eq,xor) \ + a(le,ne,ne) \ + a(le,ne,eq) \ + a(le,ne,xor) \ + \ + a(gt,lt,ne) \ + a(gt,lt,eq) \ + a(gt,lt,xor) \ + a(gt,le,ne) \ + a(gt,le,eq) \ + a(gt,le,xor) \ + a(gt,gt,ne) \ + a(gt,gt,eq) \ + a(gt,gt,xor) \ + a(gt,ge,ne) \ + a(gt,ge,eq) \ + a(gt,ge,xor) \ + a(gt,eq,ne) \ + a(gt,eq,eq) \ + a(gt,eq,xor) \ + a(gt,ne,ne) \ + a(gt,ne,eq) \ + a(gt,ne,xor) \ + \ + a(ge,lt,ne) \ + a(ge,lt,eq) \ + a(ge,lt,xor) \ + a(ge,le,ne) \ + a(ge,le,eq) \ + a(ge,le,xor) \ + a(ge,gt,ne) \ + a(ge,gt,eq) \ + a(ge,gt,xor) \ + a(ge,ge,ne) \ + a(ge,ge,eq) \ + a(ge,ge,xor) \ + a(ge,eq,ne) \ + a(ge,eq,eq) \ + a(ge,eq,xor) \ + a(ge,ne,ne) \ + a(ge,ne,eq) \ + a(ge,ne,xor) + +#define funcs1(a,b,c) \ +func(,a,b,c) \ +func(volatile,a,b,c) + +funcs(funcs1) + +#define test(op1,op2,op3) \ +do { \ + if (op1##_##op2##_##op3##_(x,y) \ + != op1##_##op2##_##op3##_volatile(x,y)) \ + __builtin_abort(); \ +} while(0); + +int main() +{ + for(int x = -10; x < 10; x++) + for(int y = -10; y < 10; y++) + { + funcs(test) + } +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpeq-4.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpeq-4.c new file mode 100644 index 00000000000..868d80fdcca --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpeq-4.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-original" } */ +/* PR tree-optimization/107881 */ + +_Bool ltgt_eq(int a, int b) +{ + _Bool c = a < b; + _Bool d = a > b; + return c == d; // a == b +} +/* { dg-final { scan-tree-dump "a_\[0-9\]+.D. == b_\[0-9\]+.D.|b_\[0-9\]+.D. == a_\[0-9\]+.D." "optimized" } } */ + +_Bool lteq_eq(int x, int y) +{ + _Bool c = x < y; + _Bool d = x == y; + return c == d; // x > y +} +/* { dg-final { scan-tree-dump "x_\[0-9\]+.D. > y_\[0-9\]+.D.|y_\[0-9\]+.D. < x_\[0-9\]+.D." "optimized" } } */ + +_Bool ltne_eq(int z, int w) +{ + _Bool c = z < w; + _Bool d = z != w; + return c == d; // z <= w +} +/* { dg-final { scan-tree-dump "z_\[0-9\]+.D. <= w_\[0-9\]+.D.|w_\[0-9\]+.D. >= y_\[0-9\]+.D." "optimized" } } */ + +_Bool lege_eq(int i, int j) +{ + _Bool c = i <= j; + _Bool d = i >= j; + return c == d; // i == j +} +/* { dg-final { scan-tree-dump "i_\[0-9\]+.D. == j_\[0-9\]+.D.|j_\[0-9\]+.D. == i_\[0-9\]+.D." "optimized" } } */ + +_Bool leeq_eq(int k, int l) +{ + _Bool c = k <= l; + _Bool d = k == l; + return c == d; // k >= l +} +/* { dg-final { scan-tree-dump "k_\[0-9\]+.D. >= l_\[0-9\]+.D.|l_\[0-9\]+.D. <= k_\[0-9\]+.D." "optimized" } } */ + +_Bool lene_eq(int m, int n) +{ + _Bool c = m <= n; + _Bool d = m != n; + return c == d; // m < n +} +/* { dg-final { scan-tree-dump "m_\[0-9\]+.D. < n_\[0-9\]+.D.|n_\[0-9\]+.D. > m_\[0-9\]+.D." "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpxor-1.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpxor-1.c new file mode 100644 index 00000000000..8de2d9d2244 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpxor-1.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* PR tree-optimization/107881 */ + +_Bool ltgtxor(int a, int b) +{ + _Bool c = a < b; + _Bool d = a > b; + return c ^ d; // a != b +} +/* { dg-final { scan-tree-dump "a_\[0-9\]+.D. != b_\[0-9\]+.D.|b_\[0-9\]+.D. != a_\[0-9\]+.D." "optimized" } } */ + +_Bool lteqxor(int x, int y) +{ + _Bool c = x < y; + _Bool d = x == y; + return c ^ d; // x <= y (basically | here) +} +/* { dg-final { scan-tree-dump "x_\[0-9\]+.D. <= y_\[0-9\]+.D.|y_\[0-9\]+.D. >= x_\[0-9\]+.D." "optimized" } } */ + +_Bool ltnexor(int z, int w) +{ + _Bool c = z < w; + _Bool d = z != w; + return c ^ d; // z > w +} +/* { dg-final { scan-tree-dump "z_\[0-9\]+.D. > w_\[0-9\]+.D.|w_\[0-9\]+.D. < y_\[0-9\]+.D." "optimized" } } */ + +_Bool legexor(int i, int j) +{ + _Bool c = i <= j; + _Bool d = i >= j; + return c ^ d; // i != j +} +/* { dg-final { scan-tree-dump "i_\[0-9\]+.D. != j_\[0-9\]+.D.|j_\[0-9\]+.D. != i_\[0-9\]+.D." "optimized" } } */ + +_Bool leeqxor(int k, int l) +{ + _Bool c = k <= l; + _Bool d = k == l; + return c ^ d; // k < l +} +/* { dg-final { scan-tree-dump "k_\[0-9\]+.D. < l_\[0-9\]+.D.|l_\[0-9\]+.D. > k_\[0-9\]+.D." "optimized" } } */ + +_Bool lenexor(int m, int n) +{ + _Bool c = m <= n; + _Bool d = m != n; + return c ^ d; // m >= n +} +/* { dg-final { scan-tree-dump "m_\[0-9\]+.D. >= n_\[0-9\]+.D.|n_\[0-9\]+.D. <= m_\[0-9\]+.D." "optimized" } } */ -- 2.31.1