On Mon, Mar 10, 2025 at 7:52 AM Konstantinos Eleftheriou
<[email protected]> wrote:
>
> Testcases for patterns `((a ^ b) & c) cmp d | a != b -> (0 cmp d | a != b)`
> and `(a ^ b) cmp c | a != b -> (0 cmp c | a != b)` were failing on some
> targets, like PowerPC.
>
> This patch moves the optimization to reassoc. Doing so, we can now handle
> cases where the related conditions appear in an AND expression too. Also,
> we can optimize cases where we have intermediate expressions between the
> related ones in the AND/OR expression on some targets. This is not handled
> on targets like PowerPC, where each condition of the AND/OR expression
> is placed into a different basic block.
>
> Bootstrapped/regtested on x86 and AArch64.
>
> PR tree-optimization/116860
>
> gcc/ChangeLog:
>
> * match.pd: Remove the following patterns:
> ((a ^ b) & c) cmp d | a != b -> (0 cmp d | a != b)
> (a ^ b) cmp c | a != b -> (0 cmp c | a != b)
I suspect removing them from match is wrong.
> * tree-ssa-reassoc.cc (INCLUDE_SET): Include <set> for std::set.
> (INCLUDE_ALGORITHM): Include <algorithm> for std::set_intersection.
> (solve_expr): New function.
> (find_terminal_nodes): New function.
> (get_terminal_nodes): New function.
> (optimize_cmp_xor_exprs): New function.
> (optimize_range_tests): Call optimize_cmp_xor_exprs.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/fold-xor-and-or.c: Renamed to fold-xor-and-or-1.c.
> * gcc.dg/tree-ssa/fold-xor-and-or-1.c:
> Add new test cases, remove logical-op-non-short-circuit=1.
> * gcc.dg/tree-ssa/fold-xor-or.c: Likewise.
> * gcc.dg/tree-ssa/fold-xor-and-or-2.c: New test.
> ---
> gcc/match.pd | 30 --
> ...{fold-xor-and-or.c => fold-xor-and-or-1.c} | 42 ++-
> .../gcc.dg/tree-ssa/fold-xor-and-or-2.c | 55 +++
> gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c | 42 ++-
> gcc/tree-ssa-reassoc.cc | 344 ++++++++++++++++++
> 5 files changed, 465 insertions(+), 48 deletions(-)
> rename gcc/testsuite/gcc.dg/tree-ssa/{fold-xor-and-or.c =>
> fold-xor-and-or-1.c} (50%)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 5c679848bdf..b78ee6eaf4c 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3871,36 +3871,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if (types_match (type, TREE_TYPE (@0)))
> (bit_xor @0 { build_one_cst (type); } ))))))
>
> -/* ((a ^ b) & c) cmp d || a != b --> (0 cmp d || a != b). */
> -(for cmp (simple_comparison)
> - (simplify
> - (bit_ior
> - (cmp:c
> - (bit_and:c
> - (bit_xor:c @0 @1)
> - tree_expr_nonzero_p@2)
> - @3)
> - (ne@4 @0 @1))
> - (bit_ior
> - (cmp
> - { build_zero_cst (TREE_TYPE (@0)); }
> - @3)
> - @4)))
> -
> -/* (a ^ b) cmp c || a != b --> (0 cmp c || a != b). */
> -(for cmp (simple_comparison)
> - (simplify
> - (bit_ior
> - (cmp:c
> - (bit_xor:c @0 @1)
> - @2)
> - (ne@3 @0 @1))
> - (bit_ior
> - (cmp
> - { build_zero_cst (TREE_TYPE (@0)); }
> - @2)
> - @3)))
> -
> /* We can't reassociate at all for saturating types. */
> (if (!TYPE_SATURATING (type))
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c
> b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-1.c
> similarity index 50%
> rename from gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c
> rename to gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-1.c
> index 99e83d8e5aa..23edf9f4342 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-1.c
> @@ -1,55 +1,79 @@
> /* { dg-do compile } */
> -/* { dg-options "-O3 -fdump-tree-optimized --param
> logical-op-non-short-circuit=1" } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
>
> typedef unsigned long int uint64_t;
>
> -int cmp1(int d1, int d2) {
> +int cmp1_or(int d1, int d2) {
> if (((d1 ^ d2) & 0xabcd) == 0 || d1 != d2)
> return 0;
> return 1;
> }
>
> -int cmp2(int d1, int d2) {
> +int cmp2_or(int d1, int d2) {
> if (d1 != d2 || ((d1 ^ d2) & 0xabcd) == 0)
> return 0;
> return 1;
> }
>
> -int cmp3(int d1, int d2) {
> +int cmp3_or(int d1, int d2) {
> if (10 > (0xabcd & (d2 ^ d1)) || d2 != d1)
> return 0;
> return 1;
> }
>
> -int cmp4(int d1, int d2) {
> +int cmp4_or(int d1, int d2) {
> if (d2 != d1 || 10 > (0xabcd & (d2 ^ d1)))
> return 0;
> return 1;
> }
>
> -int cmp1_64(uint64_t d1, uint64_t d2) {
> +int cmp1_and(int d1, int d2) {
> + if (!(((d1 ^ d2) & 0xabcd) == 0) && d1 == d2)
> + return 0;
> + return 1;
> +}
> +
> +int cmp2_and(int d1, int d2) {
> + if (d1 == d2 && !(((d1 ^ d2) & 0xabcd) == 0))
> + return 0;
> + return 1;
> +}
> +
> +int cmp1_or_64(uint64_t d1, uint64_t d2) {
> if (((d1 ^ d2) & 0xabcd) == 0 || d1 != d2)
> return 0;
> return 1;
> }
>
> -int cmp2_64(uint64_t d1, uint64_t d2) {
> +int cmp2_or_64(uint64_t d1, uint64_t d2) {
> if (d1 != d2 || ((d1 ^ d2) & 0xabcd) == 0)
> return 0;
> return 1;
> }
>
> -int cmp3_64(uint64_t d1, uint64_t d2) {
> +int cmp3_or_64(uint64_t d1, uint64_t d2) {
> if (10 > (0xabcd & (d2 ^ d1)) || d2 != d1)
> return 0;
> return 1;
> }
>
> -int cmp4_64(uint64_t d1, uint64_t d2) {
> +int cmp4_or_64(uint64_t d1, uint64_t d2) {
> if (d2 != d1 || 10 > (0xabcd & (d2 ^ d1)))
> return 0;
> return 1;
> }
>
> +int cmp1_and_64(uint64_t d1, uint64_t d2) {
> + if (!(((d1 ^ d2) & 0xabcd) == 0) && d1 == d2)
> + return 0;
> + return 1;
> +}
> +
> +int cmp2_and_64(uint64_t d1, uint64_t d2) {
> + if (d1 == d2 && !(((d1 ^ d2) & 0xabcd) == 0))
> + return 0;
> + return 1;
> +}
> +
> /* The if should be removed, so the condition should not exist */
> /* { dg-final { scan-tree-dump-not "d1_\[0-9\]+.D. \\^ d2_\[0-9\]+.D."
> "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c
> b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c
> new file mode 100644
> index 00000000000..232adbd48d8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c
> @@ -0,0 +1,55 @@
> +/* { dg-do compile { target { aarch64-*-* x86_64-*-* } } } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
> +
> +typedef unsigned long int uint64_t;
> +
> +int cmp1_or_inter(int d1, int d2, int d3) {
> + if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2)
> + return 0;
> + return 1;
> +}
> +
> +int cmp2_or_inter(int d1, int d2, int d3, int d4) {
> + if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2 || d4 == 11)
> + return 0;
> + return 1;
> +}
> +
> +int cmp1_and_inter(int d1, int d2, int d3) {
> + if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2)
> + return 0;
> + return 1;
> +}
> +
> +int cmp2_and_inter(int d1, int d2, int d3, int d4) {
> + if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2 && d4 != 11)
> + return 0;
> + return 1;
> +}
> +
> +int cmp1_or_inter_64(uint64_t d1, uint64_t d2, uint64_t d3) {
> + if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2)
> + return 0;
> + return 1;
> +}
> +
> +int cmp2_or_inter_64(uint64_t d1, uint64_t d2, uint64_t d3, uint64_t d4) {
> + if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2 || d4 == 11)
> + return 0;
> + return 1;
> +}
> +
> +int cmp1_and_inter_64(uint64_t d1, uint64_t d2, uint64_t d3) {
> + if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2)
> + return 0;
> + return 1;
> +}
> +
> +int cmp2_and_inter_64(uint64_t d1, uint64_t d2, uint64_t d3, uint64_t d4) {
> + if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2 && d4 != 11)
> + return 0;
> + return 1;
> +}
> +
> +/* The if should be removed, so the condition should not exist */
> +/* { dg-final { scan-tree-dump-not "d1_\[0-9\]+.D. \\^ d2_\[0-9\]+.D."
> "optimized" } } */
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c
> b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c
> index 51b7373af0d..5509cc67577 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c
> @@ -1,55 +1,79 @@
> /* { dg-do compile } */
> -/* { dg-options "-O3 -fdump-tree-optimized --param
> logical-op-non-short-circuit=1" } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
>
> typedef unsigned long int uint64_t;
>
> -int cmp1(int d1, int d2) {
> +int cmp1_or(int d1, int d2) {
> if ((d1 ^ d2) == 0xabcd || d1 != d2)
> return 0;
> return 1;
> }
>
> -int cmp2(int d1, int d2) {
> +int cmp2_or(int d1, int d2) {
> if (d1 != d2 || (d1 ^ d2) == 0xabcd)
> return 0;
> return 1;
> }
>
> -int cmp3(int d1, int d2) {
> +int cmp3_or(int d1, int d2) {
> if (0xabcd > (d2 ^ d1) || d2 != d1)
> return 0;
> return 1;
> }
>
> -int cmp4(int d1, int d2) {
> +int cmp4_or(int d1, int d2) {
> if (d2 != d1 || 0xabcd > (d2 ^ d1))
> return 0;
> return 1;
> }
>
> -int cmp1_64(uint64_t d1, uint64_t d2) {
> +int cmp1_and(int d1, int d2) {
> + if (!((d1 ^ d2) == 0xabcd) && d1 == d2)
> + return 0;
> + return 1;
> +}
> +
> +int cmp2_and(int d1, int d2) {
> + if (d1 == d2 && !((d1 ^ d2) == 0xabcd))
> + return 0;
> + return 1;
> +}
> +
> +int cmp1_64_or(uint64_t d1, uint64_t d2) {
> if ((d1 ^ d2) == 0xabcd || d1 != d2)
> return 0;
> return 1;
> }
>
> -int cmp2_64(uint64_t d1, uint64_t d2) {
> +int cmp2_64_or(uint64_t d1, uint64_t d2) {
> if (d1 != d2 || (d1 ^ d2) == 0xabcd)
> return 0;
> return 1;
> }
>
> -int cmp3_64(uint64_t d1, uint64_t d2) {
> +int cmp3_64_or(uint64_t d1, uint64_t d2) {
> if (0xabcd > (d2 ^ d1) || d2 != d1)
> return 0;
> return 1;
> }
>
> -int cmp4_64(uint64_t d1, uint64_t d2) {
> +int cmp4_64_or(uint64_t d1, uint64_t d2) {
> if (d2 != d1 || 0xabcd > (d2 ^ d1))
> return 0;
> return 1;
> }
>
> +int cmp1_64_and(uint64_t d1, uint64_t d2) {
> + if (!((d1 ^ d2) == 0xabcd) && d1 == d2)
> + return 0;
> + return 1;
> +}
> +
> +int cmp2_64_and(uint64_t d1, uint64_t d2) {
> + if (d1 == d2 && !((d1 ^ d2) == 0xabcd))
> + return 0;
> + return 1;
> +}
> +
> /* The if should be removed, so the condition should not exist */
> /* { dg-final { scan-tree-dump-not "d1_\[0-9\]+.D. \\^ d2_\[0-9\]+.D."
> "optimized" } } */
> diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc
> index 4017eea3413..3c15cc44560 100644
> --- a/gcc/tree-ssa-reassoc.cc
> +++ b/gcc/tree-ssa-reassoc.cc
> @@ -19,6 +19,8 @@ along with GCC; see the file COPYING3. If not see
> <http://www.gnu.org/licenses/>. */
>
> #include "config.h"
> +#define INCLUDE_SET
> +#define INCLUDE_ALGORITHM /* std::set_intersection. */
> #include "system.h"
> #include "coretypes.h"
> #include "backend.h"
> @@ -4077,6 +4079,347 @@ optimize_range_tests_var_bound (enum tree_code
> opcode, int first, int length,
> return any_changes;
> }
>
> +/* Helper function for optimize_cmp_xor_exprs. Visit EXPR operands
> + recursively and try to find comparison or XOR expressions that can be
> + solved using the expressions in CALC_STMTS. Expressions that can be
> folded
> + to 0 are stored in STMTS_TO_FOLD. IS_OR_EXPR is true for OR expressions
> + and false for AND expressions. */
> +
> +tree
> +solve_expr (tree expr, std::set<gimple *> *calc_stmts,
> + std::set<gimple *> *stmts_to_fold, std::set<tree> *visited,
> + bool is_or_expr)
> +{
> + /* Return, if have already visited this expression or the expression is not
> + an SSA name. */
> + if ((visited->find (expr) != visited->end ())
> + || (TREE_CODE (expr) != SSA_NAME))
> + return NULL_TREE;
> +
> + visited->insert (expr);
> +
> + gimple *def_stmt = SSA_NAME_DEF_STMT (expr);
> +
> + if (!def_stmt || !is_gimple_assign (def_stmt))
> + return expr;
> +
> + unsigned int op_num = gimple_num_ops (def_stmt);
> + unsigned int terminal_node_num = 0;
> + /* Visit the expression operands recursively until finding a statement that
> + all of its operands are terminal nodes. */
> + for (unsigned i = 1; i < op_num; ++i)
> + {
> + tree op = gimple_op (def_stmt, i);
> + if (!op) continue;
> + tree solve_result = solve_expr (op, calc_stmts, stmts_to_fold, visited,
> + is_or_expr);
> + if (solve_result == op)
> + terminal_node_num++;
> + }
> +
> + /* Check if all of the operands are terminal nodes. */
> + if (terminal_node_num != (op_num - 1))
> + return NULL_TREE;
> +
> + tree_code def_code = gimple_assign_rhs_code (def_stmt);
> + /* XOR and NE expressions are handled in a similar manner. */
> + if (def_code == BIT_XOR_EXPR)
> + def_code = NE_EXPR;
> +
> + tree def_lhs = gimple_assign_lhs (def_stmt);
> + tree def_op1 = gimple_assign_rhs1 (def_stmt);
> + tree def_op2 = gimple_assign_rhs2 (def_stmt);
> + tree def_op3 = gimple_assign_rhs3 (def_stmt);
> +
> + /* Find possible statements in calc_stmts that can solve the current
> + expression. We are looking for statements with the same operation and
> + the same operands as the current one in case of an OR expression, or
> + a statement using the inverse operation of the current one, with the
> same
> + operands, in case of an AND expression. */
> + for (gimple *stmt : *calc_stmts)
> + {
> + tree_code stmt_rhs_code = gimple_assign_rhs_code (stmt);
> + tree_code inverted_code = invert_tree_comparison (stmt_rhs_code,
> + HONOR_NANS (TREE_TYPE
> (expr)));
> + if (((is_or_expr && def_code == stmt_rhs_code)
> + || (!is_or_expr && def_code == inverted_code))
> + && gimple_assign_lhs (stmt) != def_lhs
> + && gimple_assign_rhs1 (stmt) == def_op1
> + && gimple_assign_rhs2 (stmt) == def_op2
> + && gimple_assign_rhs3 (stmt) == def_op3)
> + {
> + /* In case of an AND expression, where the related terms are in
> + different blocks, fold the term that is dominated by the
> + other. This ensures the correct handling of cases where
> + a related term may not be part of the AND expression, but
> + only happens to be inside the `if` statement's block. */
> + if (is_or_expr
> + || (gimple_bb (stmt) == gimple_bb (def_stmt))
> + || reassoc_stmt_dominates_stmt_p (stmt, def_stmt))
> + {
> + stmts_to_fold->insert (def_stmt);
> + calc_stmts->erase (def_stmt);
> + }
> + else if (reassoc_stmt_dominates_stmt_p (def_stmt, stmt))
> + {
> + stmts_to_fold->insert (stmt);
> + calc_stmts->erase (stmt);
> + }
> +
> + return expr;
> + }
> + }
> +
> + return NULL_TREE;
> +}
> +
> +/* Helper function for optimize_cmp_xor_exprs. Unfold EXPR and get the
> + terminal nodes in which it is analyzed. */
> +
> +void
> +find_terminal_nodes (tree expr, std::set<tree> *terminal_nodes,
> + std::set<tree> *visited)
Instead of std::set<tree> you most likely want to use hash_set<tree> instead.
> +{
> + if (visited->find (expr) != visited->end ())
> + return;
> +
> + visited->insert (expr);
> +
> + if (TREE_CODE (expr) != SSA_NAME)
> + {
> + terminal_nodes->insert (expr);
> + return;
> + }
> +
> + gimple *def_stmt = SSA_NAME_DEF_STMT (expr);
> +
> + if (is_gimple_debug (def_stmt))
> + return;
> +
> + if (!def_stmt || !is_gimple_assign (def_stmt))
> + {
> + terminal_nodes->insert (expr);
> + return;
> + }
> +
> + /* Visit the expression operands recursively. */
> + unsigned int op_num = gimple_num_ops (def_stmt);
> + for (unsigned i = 1; i < op_num; ++i)
> + {
> + tree op = gimple_op (def_stmt, i);
> + if (!op) continue;
> + find_terminal_nodes (op, terminal_nodes, visited);
> + }
> +}
> +
> +/* Helper function for optimize_cmp_xor_exprs. Check if the terminal nodes
> + for EXPR have been calculated before, otherwise find them. */
> +
> +std::set<tree>
> +get_terminal_nodes (tree expr, auto_vec<std::set<tree>> *terminal_nodes,
> + unsigned int index)
> +{
> + if (terminal_nodes->length () > index)
> + return (*terminal_nodes)[index];
> + else
> + {
> + std::set<tree> visited;
> + std::set<tree> expr_terminal_nodes;
> +
> + find_terminal_nodes (expr, &expr_terminal_nodes, &visited);
> + terminal_nodes->safe_push (expr_terminal_nodes);
> + return expr_terminal_nodes;
> + }
> +}
> +
> +/* Optimize boolean expressions containing comparisons or xor expressions and
> + the value of one term in the expression implies the value of another, like
> + the following:
> + ((d1 ^ d2) & 0xabcd) == 0 | d1 != d2 --> (0 & 0xabcd) == 0 | d1 != d2,
> + which will later be simplified to true.
> + (d1 ^ d2) == 0xabcd | d1 != d2 --> 0 == 0xabcd | d1 != d2,
> + which will later be simplified to d1 != d2.
> + ((d1 ^ d2) & 0xabcd) == 0 | d3 != 10 | d1 != d2 -->
> + (0 & 0xabcd) == 0 | d3 != 10 | d1 != d2,
> + which will later be simplified to true. */
> +
> +static bool
> +optimize_cmp_xor_exprs (tree_code opcode, vec<operand_entry *> *ops)
> +{
> + std::set<std::set<tree>> op_subexprsets;
> + std::set<tree> terms_in_preds;
> + bool is_or_expr = opcode == BIT_IOR_EXPR;
> + bool any_changes = false;
> +
> + if (!is_or_expr && (opcode != BIT_AND_EXPR))
> + return false;
> +
> + /* Iterate over the operands in the AND/OR expression and keep those that
> + are SSA names. */
> + std::set<tree> expr_terms;
> + for (operand_entry *oe : ops)
> + {
> + tree op = oe->op;
> + if (TREE_CODE (op) == SSA_NAME)
> + expr_terms.insert (op);
> + }
> +
> + /* Find related terms to the AND/OR expression in the current block's
> + predecessors. */
> + if (expr_terms.size () > 0)
> + {
> + edge e;
> + edge_iterator ei;
> + tree op = *(expr_terms.begin ());
> + gimple *op_def = SSA_NAME_DEF_STMT (op);
> + basic_block curr_bb;
> +
> + if (op_def && (curr_bb = gimple_bb (op_def)))
> + {
> + FOR_EACH_EDGE (e, ei, curr_bb->preds)
> + {
> + basic_block pred = e->src;
> + gimple_stmt_iterator gsi = gsi_last_bb (pred);
> + gimple *last_stmt = gsi_stmt (gsi);
> +
> + if (!last_stmt || (gimple_code (last_stmt) != GIMPLE_COND))
> + continue;
> +
> + tree_code cond_code = gimple_cond_code (last_stmt);
> + tree cond_lhs = gimple_cond_lhs (last_stmt);
> +
> + if ((cond_code == EQ_EXPR || cond_code == NE_EXPR)
> + && TREE_CODE (cond_lhs) == SSA_NAME
> + && (integer_zerop (gimple_cond_rhs (last_stmt)))
> + && (EDGE_COUNT (pred->succs) > 1))
> + {
> + edge t = EDGE_SUCC (pred, 0);
> + edge e = EDGE_SUCC (pred, 1);
> +
> + if (!(t->flags & EDGE_TRUE_VALUE))
> + std::swap (t, e);
> +
> + if ((is_or_expr && e->dest == curr_bb)
> + || (!is_or_expr && t->dest == curr_bb))
> + terms_in_preds.insert (cond_lhs);
> + }
> + }
> + }
> + }
> +
> + expr_terms.insert (terms_in_preds.begin (), terms_in_preds.end ());
> +
> + /* Initialize sets of related expressions. */
> + auto_vec<std::set<tree>> terminal_nodes;
> + unsigned int i = 0;
> + for (std::set<tree>::iterator it = expr_terms.begin ();
> + it != expr_terms.end (); ++it, ++i)
> + {
> + tree op = *it;
> + std::set<tree> related_terms = {op};
> +
> + std::set<tree> op_terminal_nodes = get_terminal_nodes (op,
> + &terminal_nodes, i);
> +
> + unsigned int j = i + 1;
> + for (std::set<tree>::iterator next_it = std::next (it);
> + next_it != expr_terms.end (); ++next_it, ++j)
> + {
> + tree next_op = *next_it;
> + std::set<tree> next_op_term_nodes = get_terminal_nodes (next_op,
> + &terminal_nodes, j);
> + std::set<tree> intersection;
> + std::set_intersection (op_terminal_nodes.begin (),
> + op_terminal_nodes.end (),
> + next_op_term_nodes.begin (),
> + next_op_term_nodes.end (),
> + std::inserter (intersection,
> + intersection.begin ()));
With hash_set, you can't use set_intersection since hash tables
iterators are not sorted. But you do your own intersection here.
Thanks,
Andrew Pinski
> + if (intersection.size () > 1)
> + related_terms.insert (next_op);
> + }
> +
> + op_subexprsets.insert (related_terms);
> + }
> +
> + /* Iterate over op_subexprsets, analyzing and trying to fold the
> expressions
> + in each set of related expressions until reaching a fixed-point. */
> +
> + auto_vec<tree> solved_exprs;
> + for (std::set<tree> expr_set : op_subexprsets)
> + {
> +
> + if (expr_set.size () < 2) continue;
> +
> + std::set<gimple *> calc_stmts;
> + std::set<gimple *> stmts_to_fold;
> + bool any_change;
> +
> + do {
> + any_change = false;
> + for (tree subexpr : expr_set)
> + {
> + gimple *def_stmt = SSA_NAME_DEF_STMT (subexpr);
> + if (!is_gimple_assign (def_stmt))
> + continue;
> +
> + /* If the expression's def is an EQ or NE expression, store it in
> + calc_stmts in order to use it to solve more complex
> + expressions. */
> + tree_code def_stmt_code = gimple_assign_rhs_code (def_stmt);
> + if ((def_stmt_code == EQ_EXPR || def_stmt_code == NE_EXPR)
> + && (calc_stmts.find (def_stmt) == calc_stmts.end ())
> + && (stmts_to_fold.find (def_stmt) == stmts_to_fold.end ()))
> + {
> + calc_stmts.insert (def_stmt);
> + any_change = true;
> + }
> + else
> + {
> + std::set<tree> visited;
> + if (solve_expr (subexpr, &calc_stmts, &stmts_to_fold,
> + &visited, is_or_expr))
> + solved_exprs.safe_push (subexpr);
> + }
> + }
> + } while (any_change);
> +
> + for (gimple *stmt : stmts_to_fold)
> + {
> + tree stmt_lhs = gimple_assign_lhs (stmt);
> + if (dump_file && (dump_flags & TDF_DETAILS))
> + {
> + fprintf (dump_file, "Folding ");
> + print_generic_expr (dump_file, stmt_lhs);
> + fprintf (dump_file, " to 0\n");
> + }
> +
> + operand_entry *oe;
> + unsigned int i;
> + tree zero = build_zero_cst (TREE_TYPE (stmt_lhs));
> + FOR_EACH_VEC_ELT (*ops, i, oe)
> + {
> + if (oe->op == stmt_lhs)
> + {
> + oe->op = zero;
> + reassociate_stats.ops_eliminated += ops->length () - 1;
> + ops->truncate (0);
> + ops->quick_push (oe);
> + }
> + }
> +
> + gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt);
> + gsi_remove (&stmt_gsi, true);
> + replace_uses_by (stmt_lhs, zero);
> + release_defs (stmt);
> +
> + any_changes = true;
> + }
> + }
> +
> + return any_changes;
> +}
> +
> /* Optimize range tests, similarly how fold_range_test optimizes
> it on trees. The tree code for the binary
> operation between all the operands is OPCODE.
> @@ -4170,6 +4513,7 @@ optimize_range_tests (enum tree_code opcode,
> ranges, first_bb);
> any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
> ops, ranges);
> + any_changes |= optimize_cmp_xor_exprs (opcode, ops);
>
> if (any_changes && opcode != ERROR_MARK)
> {
> --
> 2.47.0
>