Testcases for match.pd patterns `((a ^ b) & c) cmp d | a != b -> (0 cmp d | a != b)` and `(a ^ b) cmp c | a != b -> (0 cmp c | a != b)` were failing on some targets, like PowerPC.
This patch adds an implemenetation for the optimization in reassoc. Doing so, we can now handle cases where the related conditions appear in an AND expression too. Also, we can optimize cases where we have intermediate expressions between the related ones in the AND/OR expression on some targets. This is not handled on targets like PowerPC, where each condition of the AND/OR expression is placed into a different basic block. Bootstrapped/regtested on x86 and AArch64. PR tree-optimization/116860 gcc/ChangeLog: * tree-ssa-reassoc.cc (solve_expr): New function. (find_terminal_nodes): New function. (get_terminal_nodes): New function. (optimize_cmp_xor_exprs): New function. (optimize_range_tests): Call optimize_cmp_xor_exprs. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/fold-xor-and-or.c: Remove logical-op-non-short-circuit=1. * gcc.dg/tree-ssa/fold-xor-or.c: Likewise. * gcc.dg/tree-ssa/fold-xor-and-or-2.c: New test. * gcc.dg/tree-ssa/fold-xor-and.c: New test. --- .../gcc.dg/tree-ssa/fold-xor-and-or-2.c | 59 +++ .../gcc.dg/tree-ssa/fold-xor-and-or.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and.c | 55 +++ gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c | 2 +- gcc/tree-ssa-reassoc.cc | 354 ++++++++++++++++++ 5 files changed, 470 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c new file mode 100644 index 00000000000..eea44d616b4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c @@ -0,0 +1,59 @@ +/* This test is not working across all targets (e.g. it fails on PowerPC, + because each condition of the AND/OR expression is placed into + a different basic block). Therefore, it is gated for x86-64 and AArch64, + where we know that it has to pass. */ +/* { dg-do compile { target { aarch64-*-* x86_64-*-* } } } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +typedef unsigned long int uint64_t; + +int cmp1_or_inter(int d1, int d2, int d3) { + if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2) + return 0; + return 1; +} + +int cmp2_or_inter(int d1, int d2, int d3, int d4) { + if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2 || d4 == 11) + return 0; + return 1; +} + +int cmp1_and_inter(int d1, int d2, int d3) { + if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2) + return 0; + return 1; +} + +int cmp2_and_inter(int d1, int d2, int d3, int d4) { + if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2 && d4 != 11) + return 0; + return 1; +} + +int cmp1_or_inter_64(uint64_t d1, uint64_t d2, uint64_t d3) { + if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2) + return 0; + return 1; +} + +int cmp2_or_inter_64(uint64_t d1, uint64_t d2, uint64_t d3, uint64_t d4) { + if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2 || d4 == 11) + return 0; + return 1; +} + +int cmp1_and_inter_64(uint64_t d1, uint64_t d2, uint64_t d3) { + if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2) + return 0; + return 1; +} + +int cmp2_and_inter_64(uint64_t d1, uint64_t d2, uint64_t d3, uint64_t d4) { + if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2 && d4 != 11) + return 0; + return 1; +} + +/* The if should be removed, so the condition should not exist */ +/* { dg-final { scan-tree-dump-not "d1_\[0-9\]+.D. \\^ d2_\[0-9\]+.D." "optimized" } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c index 99e83d8e5aa..e5dc98e7541 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ typedef unsigned long int uint64_t; diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and.c b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and.c new file mode 100644 index 00000000000..558ba888553 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and.c @@ -0,0 +1,55 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +typedef unsigned long int uint64_t; + +int cmp1(int d1, int d2) { + if (!((d1 ^ d2) == 0xabcd) && d1 == d2) + return 0; + return 1; +} + +int cmp2(int d1, int d2) { + if (d1 == d2 && !((d1 ^ d2) == 0xabcd)) + return 0; + return 1; +} + +int cmp3(int d1, int d2) { + if (!(((d1 ^ d2) & 0xabcd) == 0) && d1 == d2) + return 0; + return 1; +} + +int cmp4(int d1, int d2) { + if (d1 == d2 && !(((d1 ^ d2) & 0xabcd) == 0)) + return 0; + return 1; +} + +int cmp1_64(uint64_t d1, uint64_t d2) { + if (!((d1 ^ d2) == 0xabcd) && d1 == d2) + return 0; + return 1; +} + +int cmp2_64(uint64_t d1, uint64_t d2) { + if (d1 == d2 && !((d1 ^ d2) == 0xabcd)) + return 0; + return 1; +} + +int cmp3_64(uint64_t d1, uint64_t d2) { + if (!(((d1 ^ d2) & 0xabcd) == 0) && d1 == d2) + return 0; + return 1; +} + +int cmp4_64(uint64_t d1, uint64_t d2) { + if (d1 == d2 && !(((d1 ^ d2) & 0xabcd) == 0)) + return 0; + return 1; +} + +/* The if should be removed, so the condition should not exist */ +/* { dg-final { scan-tree-dump-not "d1_\[0-9\]+.D. \\^ d2_\[0-9\]+.D." "optimized" } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c index 51b7373af0d..c55cfbcc84c 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ typedef unsigned long int uint64_t; diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc index 4017eea3413..c60ad9bb2fe 100644 --- a/gcc/tree-ssa-reassoc.cc +++ b/gcc/tree-ssa-reassoc.cc @@ -4077,6 +4077,359 @@ optimize_range_tests_var_bound (enum tree_code opcode, int first, int length, return any_changes; } +/* Helper function for optimize_cmp_xor_exprs. Visit EXPR operands + recursively and try to find comparison or XOR expressions that can be + solved using the expressions in CALC_STMTS. Expressions that can be folded + to 0 are stored in STMTS_TO_FOLD. IS_OR_EXPR is true for OR expressions + and false for AND expressions. */ + +tree +solve_expr (tree expr, hash_set<gimple *> *calc_stmts, + hash_set<gimple *> *stmts_to_fold, hash_set<tree> *visited, + bool is_or_expr) +{ + /* Return, if have already visited this expression or the expression is not + an SSA name. */ + if (visited->contains (expr) || TREE_CODE (expr) != SSA_NAME) + return NULL_TREE; + + visited->add (expr); + + gimple *def_stmt = SSA_NAME_DEF_STMT (expr); + + if (!def_stmt || !is_gimple_assign (def_stmt)) + return expr; + + unsigned int op_num = gimple_num_ops (def_stmt); + unsigned int terminal_node_num = 0; + /* Visit the expression operands recursively until finding a statement that + all of its operands are terminal nodes. */ + for (unsigned i = 1; i < op_num; ++i) + { + tree op = gimple_op (def_stmt, i); + if (!op) + continue; + tree solve_result = solve_expr (op, calc_stmts, stmts_to_fold, visited, + is_or_expr); + if (solve_result == op) + terminal_node_num++; + } + + /* Check if all of the operands are terminal nodes. */ + if (terminal_node_num != op_num - 1) + return NULL_TREE; + + tree_code def_code = gimple_assign_rhs_code (def_stmt); + /* XOR and NE expressions are handled in a similar manner. */ + if (def_code == BIT_XOR_EXPR) + def_code = NE_EXPR; + + tree def_lhs = gimple_assign_lhs (def_stmt); + tree def_op1 = gimple_assign_rhs1 (def_stmt); + tree def_op2 = gimple_assign_rhs2 (def_stmt); + tree def_op3 = gimple_assign_rhs3 (def_stmt); + + /* Find possible statements in calc_stmts that can solve the current + expression. We are looking for statements with the same operation and + the same operands as the current one in case of an OR expression, or + a statement using the inverse operation of the current one, with the same + operands, in case of an AND expression. */ + for (gimple *stmt : *calc_stmts) + { + tree_code stmt_rhs_code = gimple_assign_rhs_code (stmt); + tree_code inverted_code + = invert_tree_comparison (stmt_rhs_code, + HONOR_NANS (TREE_TYPE (expr))); + if (((is_or_expr && def_code == stmt_rhs_code) + || (!is_or_expr && def_code == inverted_code)) + && gimple_assign_lhs (stmt) != def_lhs + && gimple_assign_rhs1 (stmt) == def_op1 + && gimple_assign_rhs2 (stmt) == def_op2 + && gimple_assign_rhs3 (stmt) == def_op3) + { + /* In case of an AND expression, where the related terms are in + different blocks, fold the term that is dominated by the + other. This ensures the correct handling of cases where + a related term may not be part of the AND expression, but + only happens to be inside the `if` statement's block. */ + if (is_or_expr + || gimple_bb (stmt) == gimple_bb (def_stmt) + || reassoc_stmt_dominates_stmt_p (stmt, def_stmt)) + { + stmts_to_fold->add (def_stmt); + calc_stmts->remove (def_stmt); + } + else if (reassoc_stmt_dominates_stmt_p (def_stmt, stmt)) + { + stmts_to_fold->add (stmt); + calc_stmts->remove (stmt); + } + + return expr; + } + } + + return NULL_TREE; +} + +/* Helper function for optimize_cmp_xor_exprs. Unfold EXPR and get the + terminal nodes in which it is analyzed. */ + +void +find_terminal_nodes (tree expr, hash_set<tree> *terminal_nodes, + hash_set<tree> *visited) +{ + if (visited->contains (expr)) + return; + + visited->add (expr); + + if (TREE_CODE (expr) != SSA_NAME) + { + terminal_nodes->add (expr); + return; + } + + gimple *def_stmt = SSA_NAME_DEF_STMT (expr); + + if (is_gimple_debug (def_stmt)) + return; + + if (!def_stmt || !is_gimple_assign (def_stmt)) + { + terminal_nodes->add (expr); + return; + } + + /* Visit the expression operands recursively. */ + unsigned int op_num = gimple_num_ops (def_stmt); + for (unsigned i = 1; i < op_num; ++i) + { + tree op = gimple_op (def_stmt, i); + if (!op) + continue; + find_terminal_nodes (op, terminal_nodes, visited); + } +} + +/* Helper function for optimize_cmp_xor_exprs. Check if the terminal nodes + for EXPR have been calculated before, otherwise find them. */ + +hash_set<tree> +get_terminal_nodes (tree expr, auto_vec<hash_set<tree>> *terminal_nodes, + unsigned int index) +{ + if (terminal_nodes->length () > index) + return (*terminal_nodes)[index]; + else + { + hash_set<tree> visited; + hash_set<tree> expr_terminal_nodes; + + find_terminal_nodes (expr, &expr_terminal_nodes, &visited); + terminal_nodes->safe_push (expr_terminal_nodes); + return expr_terminal_nodes; + } +} + +/* Optimize boolean expressions containing comparisons or xor expressions and + the value of one term in the expression implies the value of another, like + the following: + ((d1 ^ d2) & 0xabcd) == 0 | d1 != d2 --> (0 & 0xabcd) == 0 | d1 != d2, + which will later be simplified to true. + (d1 ^ d2) == 0xabcd | d1 != d2 --> 0 == 0xabcd | d1 != d2, + which will later be simplified to d1 != d2. + ((d1 ^ d2) & 0xabcd) == 0 | d3 != 10 | d1 != d2 --> + (0 & 0xabcd) == 0 | d3 != 10 | d1 != d2, + which will later be simplified to true. */ + +static bool +optimize_cmp_xor_exprs (tree_code opcode, vec<operand_entry *> *ops) +{ + auto_vec<hash_set<tree>> op_subexprsets; + hash_set<tree> terms_in_preds; + bool is_or_expr = opcode == BIT_IOR_EXPR; + bool any_changes = false; + + if (!is_or_expr && opcode != BIT_AND_EXPR) + return false; + + /* Iterate over the operands in the AND/OR expression and keep those that + are SSA names. */ + hash_set<tree> expr_terms; + for (operand_entry *oe : ops) + { + tree op = oe->op; + if (TREE_CODE (op) == SSA_NAME) + expr_terms.add (op); + } + + /* Find related terms to the AND/OR expression in the current block's + predecessors. */ + if (expr_terms.elements () > 0) + { + edge e; + edge_iterator ei; + tree op = *expr_terms.begin (); + gimple *op_def = SSA_NAME_DEF_STMT (op); + basic_block curr_bb; + + if (op_def && (curr_bb = gimple_bb (op_def))) + { + FOR_EACH_EDGE (e, ei, curr_bb->preds) + { + basic_block pred = e->src; + gimple_stmt_iterator gsi = gsi_last_bb (pred); + gimple *last_stmt = gsi_stmt (gsi); + + if (!last_stmt || gimple_code (last_stmt) != GIMPLE_COND) + continue; + + tree_code cond_code = gimple_cond_code (last_stmt); + tree cond_lhs = gimple_cond_lhs (last_stmt); + + if ((cond_code == EQ_EXPR || cond_code == NE_EXPR) + && TREE_CODE (cond_lhs) == SSA_NAME + && integer_zerop (gimple_cond_rhs (last_stmt)) + && EDGE_COUNT (pred->succs) > 1) + { + edge t = EDGE_SUCC (pred, 0); + edge e = EDGE_SUCC (pred, 1); + + if (!(t->flags & EDGE_TRUE_VALUE)) + std::swap (t, e); + + if ((is_or_expr && e->dest == curr_bb) + || (!is_or_expr && t->dest == curr_bb)) + terms_in_preds.add (cond_lhs); + } + } + } + } + + for (const tree &term : terms_in_preds) + expr_terms.add (term); + + /* Initialize sets of related expressions. */ + auto_vec<hash_set<tree>> terminal_nodes; + unsigned int i = 0; + for (hash_set<tree>::iterator it = expr_terms.begin (); + it != expr_terms.end (); ++it, ++i) + { + tree op = *it; + hash_set<tree> related_terms; + related_terms.add (op); + + hash_set<tree> op_terminal_nodes + = get_terminal_nodes (op, &terminal_nodes, i); + + /* Search the rest of the set for terms related to the current + one. */ + unsigned int j = i + 1; + hash_set<tree>::iterator next_it = it; + for (++next_it; next_it != expr_terms.end (); ++next_it, ++j) + { + tree next_op = *next_it; + hash_set<tree> next_op_term_nodes + = get_terminal_nodes (next_op, &terminal_nodes, j); + + /* If the terms have at least 2 common terminal nodes, add + next_op to the set of related terms. */ + unsigned int common_term_num = 0; + for (tree term_node : op_terminal_nodes) + { + if (next_op_term_nodes.contains (term_node)) + common_term_num++; + + if (common_term_num == 2) + { + related_terms.add (next_op); + break; + } + } + } + + op_subexprsets.safe_push (related_terms); + } + + /* Iterate over op_subexprsets, analyzing and trying to fold the expressions + in each set of related expressions until reaching a fixed-point. */ + + auto_vec<tree> solved_exprs; + for (hash_set<tree> expr_set : op_subexprsets) + { + if (expr_set.elements () < 2) + continue; + + hash_set<gimple *> calc_stmts; + hash_set<gimple *> stmts_to_fold; + bool any_change; + + do { + any_change = false; + for (tree subexpr : expr_set) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (subexpr); + if (!is_gimple_assign (def_stmt)) + continue; + + /* If the expression's def is an EQ or NE expression, store it in + calc_stmts in order to use it to solve more complex + expressions. */ + tree_code def_stmt_code = gimple_assign_rhs_code (def_stmt); + if ((def_stmt_code == EQ_EXPR || def_stmt_code == NE_EXPR) + && !calc_stmts.contains (def_stmt) + && !stmts_to_fold.contains (def_stmt)) + { + calc_stmts.add (def_stmt); + any_change = true; + } + else + { + hash_set<tree> visited; + if (solve_expr (subexpr, &calc_stmts, &stmts_to_fold, + &visited, is_or_expr)) + solved_exprs.safe_push (subexpr); + } + } + } while (any_change); + + for (gimple *stmt : stmts_to_fold) + { + tree stmt_lhs = gimple_assign_lhs (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Folding "); + print_generic_expr (dump_file, stmt_lhs); + fprintf (dump_file, " to 0\n"); + } + + operand_entry *oe; + unsigned int i; + tree zero = build_zero_cst (TREE_TYPE (stmt_lhs)); + FOR_EACH_VEC_ELT (*ops, i, oe) + { + if (oe->op == stmt_lhs) + { + oe->op = zero; + reassociate_stats.ops_eliminated += ops->length () - 1; + ops->truncate (0); + ops->quick_push (oe); + } + } + + gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt); + gsi_remove (&stmt_gsi, true); + replace_uses_by (stmt_lhs, zero); + release_defs (stmt); + + any_changes = true; + } + } + + return any_changes; +} + /* Optimize range tests, similarly how fold_range_test optimizes it on trees. The tree code for the binary operation between all the operands is OPCODE. @@ -4170,6 +4523,7 @@ optimize_range_tests (enum tree_code opcode, ranges, first_bb); any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, ops, ranges); + any_changes |= optimize_cmp_xor_exprs (opcode, ops); if (any_changes && opcode != ERROR_MARK) { -- 2.47.0