On Mon, Mar 10, 2025 at 7:52 AM Konstantinos Eleftheriou
<konstantinos.elefther...@vrull.eu> wrote:
>
> Testcases for patterns `((a ^ b) & c) cmp d | a != b -> (0 cmp d | a != b)`
> and `(a ^ b) cmp c | a != b -> (0 cmp c | a != b)` were failing on some
> targets, like PowerPC.
>
> This patch moves the optimization to reassoc. Doing so, we can now handle
> cases where the related conditions appear in an AND expression too. Also,
> we can optimize cases where we have intermediate expressions between the
> related ones in the AND/OR expression on some targets. This is not handled
> on targets like PowerPC, where each condition of the AND/OR expression
> is placed into a different basic block.
>
> Bootstrapped/regtested on x86 and AArch64.
>
>         PR tree-optimization/116860
>
> gcc/ChangeLog:
>
>         * match.pd: Remove the following patterns:
>         ((a ^ b) & c) cmp d | a != b -> (0 cmp d | a != b)
>         (a ^ b) cmp c | a != b -> (0 cmp c | a != b)

I suspect removing them from match is wrong.

>         * tree-ssa-reassoc.cc (INCLUDE_SET): Include <set> for std::set.
>         (INCLUDE_ALGORITHM): Include <algorithm> for std::set_intersection.
>         (solve_expr): New function.
>         (find_terminal_nodes): New function.
>         (get_terminal_nodes): New function.
>         (optimize_cmp_xor_exprs): New function.
>         (optimize_range_tests): Call optimize_cmp_xor_exprs.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/fold-xor-and-or.c: Renamed to fold-xor-and-or-1.c.
>         * gcc.dg/tree-ssa/fold-xor-and-or-1.c:
>         Add new test cases, remove logical-op-non-short-circuit=1.
>         * gcc.dg/tree-ssa/fold-xor-or.c: Likewise.
>         * gcc.dg/tree-ssa/fold-xor-and-or-2.c: New test.
> ---
>  gcc/match.pd                                  |  30 --
>  ...{fold-xor-and-or.c => fold-xor-and-or-1.c} |  42 ++-
>  .../gcc.dg/tree-ssa/fold-xor-and-or-2.c       |  55 +++
>  gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c   |  42 ++-
>  gcc/tree-ssa-reassoc.cc                       | 344 ++++++++++++++++++
>  5 files changed, 465 insertions(+), 48 deletions(-)
>  rename gcc/testsuite/gcc.dg/tree-ssa/{fold-xor-and-or.c => 
> fold-xor-and-or-1.c} (50%)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 5c679848bdf..b78ee6eaf4c 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3871,36 +3871,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>      (if (types_match (type, TREE_TYPE (@0)))
>       (bit_xor @0 { build_one_cst (type); } ))))))
>
> -/* ((a ^ b) & c) cmp d || a != b --> (0 cmp d || a != b). */
> -(for cmp (simple_comparison)
> -  (simplify
> -    (bit_ior
> -      (cmp:c
> -       (bit_and:c
> -         (bit_xor:c @0 @1)
> -         tree_expr_nonzero_p@2)
> -       @3)
> -      (ne@4 @0 @1))
> -    (bit_ior
> -      (cmp
> -       { build_zero_cst (TREE_TYPE (@0)); }
> -       @3)
> -      @4)))
> -
> -/* (a ^ b) cmp c || a != b --> (0 cmp c || a != b). */
> -(for cmp (simple_comparison)
> -  (simplify
> -    (bit_ior
> -      (cmp:c
> -       (bit_xor:c @0 @1)
> -       @2)
> -      (ne@3 @0 @1))
> -    (bit_ior
> -      (cmp
> -       { build_zero_cst (TREE_TYPE (@0)); }
> -       @2)
> -      @3)))
> -
>  /* We can't reassociate at all for saturating types.  */
>  (if (!TYPE_SATURATING (type))
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-1.c
> similarity index 50%
> rename from gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c
> rename to gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-1.c
> index 99e83d8e5aa..23edf9f4342 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-1.c
> @@ -1,55 +1,79 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O3 -fdump-tree-optimized --param 
> logical-op-non-short-circuit=1" } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
>
>  typedef unsigned long int uint64_t;
>
> -int cmp1(int d1, int d2) {
> +int cmp1_or(int d1, int d2) {
>    if (((d1 ^ d2) & 0xabcd) == 0 || d1 != d2)
>      return 0;
>    return 1;
>  }
>
> -int cmp2(int d1, int d2) {
> +int cmp2_or(int d1, int d2) {
>    if (d1 != d2 || ((d1 ^ d2) & 0xabcd) == 0)
>      return 0;
>    return 1;
>  }
>
> -int cmp3(int d1, int d2) {
> +int cmp3_or(int d1, int d2) {
>    if (10 > (0xabcd & (d2 ^ d1)) || d2 != d1)
>      return 0;
>    return 1;
>  }
>
> -int cmp4(int d1, int d2) {
> +int cmp4_or(int d1, int d2) {
>    if (d2 != d1 || 10 > (0xabcd & (d2 ^ d1)))
>      return 0;
>    return 1;
>  }
>
> -int cmp1_64(uint64_t d1, uint64_t d2) {
> +int cmp1_and(int d1, int d2) {
> +  if (!(((d1 ^ d2) & 0xabcd) == 0) && d1 == d2)
> +    return 0;
> +  return 1;
> +}
> +
> +int cmp2_and(int d1, int d2) {
> +  if (d1 == d2 && !(((d1 ^ d2) & 0xabcd) == 0))
> +    return 0;
> +  return 1;
> +}
> +
> +int cmp1_or_64(uint64_t d1, uint64_t d2) {
>    if (((d1 ^ d2) & 0xabcd) == 0 || d1 != d2)
>      return 0;
>    return 1;
>  }
>
> -int cmp2_64(uint64_t d1, uint64_t d2) {
> +int cmp2_or_64(uint64_t d1, uint64_t d2) {
>    if (d1 != d2 || ((d1 ^ d2) & 0xabcd) == 0)
>      return 0;
>    return 1;
>  }
>
> -int cmp3_64(uint64_t d1, uint64_t d2) {
> +int cmp3_or_64(uint64_t d1, uint64_t d2) {
>    if (10 > (0xabcd & (d2 ^ d1)) || d2 != d1)
>      return 0;
>    return 1;
>  }
>
> -int cmp4_64(uint64_t d1, uint64_t d2) {
> +int cmp4_or_64(uint64_t d1, uint64_t d2) {
>    if (d2 != d1 || 10 > (0xabcd & (d2 ^ d1)))
>      return 0;
>    return 1;
>  }
>
> +int cmp1_and_64(uint64_t d1, uint64_t d2) {
> +  if (!(((d1 ^ d2) & 0xabcd) == 0) && d1 == d2)
> +    return 0;
> +  return 1;
> +}
> +
> +int cmp2_and_64(uint64_t d1, uint64_t d2) {
> +  if (d1 == d2 && !(((d1 ^ d2) & 0xabcd) == 0))
> +    return 0;
> +  return 1;
> +}
> +
>  /* The if should be removed, so the condition should not exist */
>  /* { dg-final { scan-tree-dump-not "d1_\[0-9\]+.D. \\^ d2_\[0-9\]+.D." 
> "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c
> new file mode 100644
> index 00000000000..232adbd48d8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c
> @@ -0,0 +1,55 @@
> +/* { dg-do compile { target { aarch64-*-* x86_64-*-* } } } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
> +
> +typedef unsigned long int uint64_t;
> +
> +int cmp1_or_inter(int d1, int d2, int d3) {
> +  if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2)
> +    return 0;
> +  return 1;
> +}
> +
> +int cmp2_or_inter(int d1, int d2, int d3, int d4) {
> +  if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2 || d4 == 11)
> +    return 0;
> +  return 1;
> +}
> +
> +int cmp1_and_inter(int d1, int d2, int d3) {
> +  if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2)
> +    return 0;
> +  return 1;
> +}
> +
> +int cmp2_and_inter(int d1, int d2, int d3, int d4) {
> +  if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2 && d4 != 11)
> +    return 0;
> +  return 1;
> +}
> +
> +int cmp1_or_inter_64(uint64_t d1, uint64_t d2, uint64_t d3) {
> +  if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2)
> +    return 0;
> +  return 1;
> +}
> +
> +int cmp2_or_inter_64(uint64_t d1, uint64_t d2, uint64_t d3, uint64_t d4) {
> +  if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2 || d4 == 11)
> +    return 0;
> +  return 1;
> +}
> +
> +int cmp1_and_inter_64(uint64_t d1, uint64_t d2, uint64_t d3) {
> +  if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2)
> +    return 0;
> +  return 1;
> +}
> +
> +int cmp2_and_inter_64(uint64_t d1, uint64_t d2, uint64_t d3, uint64_t d4) {
> +  if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2 && d4 != 11)
> +    return 0;
> +  return 1;
> +}
> +
> +/* The if should be removed, so the condition should not exist */
> +/* { dg-final { scan-tree-dump-not "d1_\[0-9\]+.D. \\^ d2_\[0-9\]+.D." 
> "optimized" } } */
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c
> index 51b7373af0d..5509cc67577 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c
> @@ -1,55 +1,79 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O3 -fdump-tree-optimized --param 
> logical-op-non-short-circuit=1" } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
>
>  typedef unsigned long int uint64_t;
>
> -int cmp1(int d1, int d2) {
> +int cmp1_or(int d1, int d2) {
>    if ((d1 ^ d2) == 0xabcd || d1 != d2)
>      return 0;
>    return 1;
>  }
>
> -int cmp2(int d1, int d2) {
> +int cmp2_or(int d1, int d2) {
>    if (d1 != d2 || (d1 ^ d2) == 0xabcd)
>      return 0;
>    return 1;
>  }
>
> -int cmp3(int d1, int d2) {
> +int cmp3_or(int d1, int d2) {
>    if (0xabcd > (d2 ^ d1) || d2 != d1)
>      return 0;
>    return 1;
>  }
>
> -int cmp4(int d1, int d2) {
> +int cmp4_or(int d1, int d2) {
>    if (d2 != d1 || 0xabcd > (d2 ^ d1))
>      return 0;
>    return 1;
>  }
>
> -int cmp1_64(uint64_t d1, uint64_t d2) {
> +int cmp1_and(int d1, int d2) {
> +  if (!((d1 ^ d2) == 0xabcd) && d1 == d2)
> +    return 0;
> +  return 1;
> +}
> +
> +int cmp2_and(int d1, int d2) {
> +  if (d1 == d2 && !((d1 ^ d2) == 0xabcd))
> +    return 0;
> +  return 1;
> +}
> +
> +int cmp1_64_or(uint64_t d1, uint64_t d2) {
>    if ((d1 ^ d2) == 0xabcd || d1 != d2)
>      return 0;
>    return 1;
>  }
>
> -int cmp2_64(uint64_t d1, uint64_t d2) {
> +int cmp2_64_or(uint64_t d1, uint64_t d2) {
>    if (d1 != d2 || (d1 ^ d2) == 0xabcd)
>      return 0;
>    return 1;
>  }
>
> -int cmp3_64(uint64_t d1, uint64_t d2) {
> +int cmp3_64_or(uint64_t d1, uint64_t d2) {
>    if (0xabcd > (d2 ^ d1) || d2 != d1)
>      return 0;
>    return 1;
>  }
>
> -int cmp4_64(uint64_t d1, uint64_t d2) {
> +int cmp4_64_or(uint64_t d1, uint64_t d2) {
>    if (d2 != d1 || 0xabcd > (d2 ^ d1))
>      return 0;
>    return 1;
>  }
>
> +int cmp1_64_and(uint64_t d1, uint64_t d2) {
> +  if (!((d1 ^ d2) == 0xabcd) && d1 == d2)
> +    return 0;
> +  return 1;
> +}
> +
> +int cmp2_64_and(uint64_t d1, uint64_t d2) {
> +  if (d1 == d2 && !((d1 ^ d2) == 0xabcd))
> +    return 0;
> +  return 1;
> +}
> +
>  /* The if should be removed, so the condition should not exist */
>  /* { dg-final { scan-tree-dump-not "d1_\[0-9\]+.D. \\^ d2_\[0-9\]+.D." 
> "optimized" } } */
> diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc
> index 4017eea3413..3c15cc44560 100644
> --- a/gcc/tree-ssa-reassoc.cc
> +++ b/gcc/tree-ssa-reassoc.cc
> @@ -19,6 +19,8 @@ along with GCC; see the file COPYING3.  If not see
>  <http://www.gnu.org/licenses/>.  */
>
>  #include "config.h"
> +#define INCLUDE_SET
> +#define INCLUDE_ALGORITHM /* std::set_intersection.  */
>  #include "system.h"
>  #include "coretypes.h"
>  #include "backend.h"
> @@ -4077,6 +4079,347 @@ optimize_range_tests_var_bound (enum tree_code 
> opcode, int first, int length,
>    return any_changes;
>  }
>
> +/* Helper function for optimize_cmp_xor_exprs.  Visit EXPR operands
> +   recursively and try to find comparison or XOR expressions that can be
> +   solved using the expressions in CALC_STMTS.  Expressions that can be 
> folded
> +   to 0 are stored in STMTS_TO_FOLD.  IS_OR_EXPR is true for OR expressions
> +   and false for AND expressions.  */
> +
> +tree
> +solve_expr (tree expr, std::set<gimple *> *calc_stmts,
> +           std::set<gimple *> *stmts_to_fold, std::set<tree> *visited,
> +           bool is_or_expr)
> +{
> +  /* Return, if have already visited this expression or the expression is not
> +     an SSA name.  */
> +  if ((visited->find (expr) != visited->end ())
> +      || (TREE_CODE (expr) != SSA_NAME))
> +    return NULL_TREE;
> +
> +  visited->insert (expr);
> +
> +  gimple *def_stmt = SSA_NAME_DEF_STMT (expr);
> +
> +  if (!def_stmt || !is_gimple_assign (def_stmt))
> +    return expr;
> +
> +  unsigned int op_num = gimple_num_ops (def_stmt);
> +  unsigned int terminal_node_num = 0;
> +  /* Visit the expression operands recursively until finding a statement that
> +     all of its operands are terminal nodes.  */
> +  for (unsigned i = 1; i < op_num; ++i)
> +    {
> +      tree op = gimple_op (def_stmt, i);
> +      if (!op) continue;
> +      tree solve_result = solve_expr (op, calc_stmts, stmts_to_fold, visited,
> +                                     is_or_expr);
> +      if (solve_result == op)
> +       terminal_node_num++;
> +    }
> +
> +  /* Check if all of the operands are terminal nodes.  */
> +  if (terminal_node_num != (op_num - 1))
> +    return NULL_TREE;
> +
> +  tree_code def_code = gimple_assign_rhs_code (def_stmt);
> +  /* XOR and NE expressions are handled in a similar manner.  */
> +  if (def_code == BIT_XOR_EXPR)
> +    def_code = NE_EXPR;
> +
> +  tree def_lhs = gimple_assign_lhs (def_stmt);
> +  tree def_op1 = gimple_assign_rhs1 (def_stmt);
> +  tree def_op2 = gimple_assign_rhs2 (def_stmt);
> +  tree def_op3 = gimple_assign_rhs3 (def_stmt);
> +
> +  /* Find possible statements in calc_stmts that can solve the current
> +     expression.  We are looking for statements with the same operation and
> +     the same operands as the current one in case of an OR expression, or
> +     a statement using the inverse operation of the current one, with the 
> same
> +     operands, in case of an AND expression.  */
> +  for (gimple *stmt : *calc_stmts)
> +    {
> +      tree_code stmt_rhs_code = gimple_assign_rhs_code (stmt);
> +      tree_code inverted_code = invert_tree_comparison (stmt_rhs_code,
> +                                               HONOR_NANS (TREE_TYPE 
> (expr)));
> +      if (((is_or_expr && def_code == stmt_rhs_code)
> +         || (!is_or_expr && def_code == inverted_code))
> +         && gimple_assign_lhs (stmt) != def_lhs
> +         && gimple_assign_rhs1 (stmt) == def_op1
> +         && gimple_assign_rhs2 (stmt) == def_op2
> +         && gimple_assign_rhs3 (stmt) == def_op3)
> +       {
> +         /* In case of an AND expression, where the related terms are in
> +            different blocks, fold the term that is dominated by the
> +            other.  This ensures the correct handling of cases where
> +            a related term may not be part of the AND expression, but
> +            only happens to be inside the `if` statement's block.  */
> +         if (is_or_expr
> +             || (gimple_bb (stmt) == gimple_bb (def_stmt))
> +             || reassoc_stmt_dominates_stmt_p (stmt, def_stmt))
> +           {
> +             stmts_to_fold->insert (def_stmt);
> +             calc_stmts->erase (def_stmt);
> +           }
> +         else if (reassoc_stmt_dominates_stmt_p (def_stmt, stmt))
> +           {
> +             stmts_to_fold->insert (stmt);
> +             calc_stmts->erase (stmt);
> +           }
> +
> +         return expr;
> +       }
> +    }
> +
> +  return NULL_TREE;
> +}
> +
> +/* Helper function for optimize_cmp_xor_exprs.  Unfold EXPR and get the
> +   terminal nodes in which it is analyzed.  */
> +
> +void
> +find_terminal_nodes (tree expr, std::set<tree> *terminal_nodes,
> +                   std::set<tree> *visited)

Instead of std::set<tree> you most likely want to use hash_set<tree> instead.

> +{
> +  if (visited->find (expr) != visited->end ())
> +    return;
> +
> +  visited->insert (expr);
> +
> +  if (TREE_CODE (expr) != SSA_NAME)
> +    {
> +      terminal_nodes->insert (expr);
> +      return;
> +    }
> +
> +  gimple *def_stmt = SSA_NAME_DEF_STMT (expr);
> +
> +  if (is_gimple_debug (def_stmt))
> +    return;
> +
> +  if (!def_stmt || !is_gimple_assign (def_stmt))
> +    {
> +      terminal_nodes->insert (expr);
> +      return;
> +    }
> +
> +  /* Visit the expression operands recursively.  */
> +  unsigned int op_num = gimple_num_ops (def_stmt);
> +  for (unsigned i = 1; i < op_num; ++i)
> +    {
> +      tree op = gimple_op (def_stmt, i);
> +      if (!op) continue;
> +      find_terminal_nodes (op, terminal_nodes, visited);
> +    }
> +}
> +
> +/* Helper function for optimize_cmp_xor_exprs.  Check if the terminal nodes
> +   for EXPR have been calculated before, otherwise find them.  */
> +
> +std::set<tree>
> +get_terminal_nodes (tree expr, auto_vec<std::set<tree>> *terminal_nodes,
> +                   unsigned int index)
> +{
> +  if (terminal_nodes->length () > index)
> +    return (*terminal_nodes)[index];
> +  else
> +    {
> +      std::set<tree> visited;
> +      std::set<tree> expr_terminal_nodes;
> +
> +      find_terminal_nodes (expr, &expr_terminal_nodes, &visited);
> +      terminal_nodes->safe_push (expr_terminal_nodes);
> +      return expr_terminal_nodes;
> +    }
> +}
> +
> +/* Optimize boolean expressions containing comparisons or xor expressions and
> +   the value of one term in the expression implies the value of another, like
> +   the following:
> +   ((d1 ^ d2) & 0xabcd) == 0 | d1 != d2 --> (0 & 0xabcd) == 0 | d1 != d2,
> +   which will later be simplified to true.
> +   (d1 ^ d2) == 0xabcd | d1 != d2 --> 0 == 0xabcd | d1 != d2,
> +   which will later be simplified to d1 != d2.
> +   ((d1 ^ d2) & 0xabcd) == 0 | d3 != 10 | d1 != d2 -->
> +     (0 & 0xabcd) == 0 | d3 != 10 | d1 != d2,
> +    which will later be simplified to true.  */
> +
> +static bool
> +optimize_cmp_xor_exprs (tree_code opcode, vec<operand_entry *> *ops)
> +{
> +  std::set<std::set<tree>> op_subexprsets;
> +  std::set<tree> terms_in_preds;
> +  bool is_or_expr = opcode == BIT_IOR_EXPR;
> +  bool any_changes = false;
> +
> +  if (!is_or_expr && (opcode != BIT_AND_EXPR))
> +    return false;
> +
> +  /* Iterate over the operands in the AND/OR expression and keep those that
> +     are SSA names.  */
> +  std::set<tree> expr_terms;
> +  for (operand_entry *oe : ops)
> +    {
> +      tree op = oe->op;
> +      if (TREE_CODE (op) == SSA_NAME)
> +       expr_terms.insert (op);
> +    }
> +
> +  /* Find related terms to the AND/OR expression in the current block's
> +     predecessors.  */
> +  if (expr_terms.size () > 0)
> +    {
> +      edge e;
> +      edge_iterator ei;
> +      tree op = *(expr_terms.begin ());
> +      gimple *op_def = SSA_NAME_DEF_STMT (op);
> +      basic_block curr_bb;
> +
> +      if (op_def && (curr_bb = gimple_bb (op_def)))
> +       {
> +         FOR_EACH_EDGE (e, ei, curr_bb->preds)
> +           {
> +             basic_block pred = e->src;
> +             gimple_stmt_iterator gsi = gsi_last_bb (pred);
> +             gimple *last_stmt = gsi_stmt (gsi);
> +
> +             if (!last_stmt || (gimple_code (last_stmt) != GIMPLE_COND))
> +               continue;
> +
> +             tree_code cond_code = gimple_cond_code (last_stmt);
> +             tree cond_lhs = gimple_cond_lhs (last_stmt);
> +
> +             if ((cond_code == EQ_EXPR || cond_code == NE_EXPR)
> +                 && TREE_CODE (cond_lhs) == SSA_NAME
> +                 && (integer_zerop (gimple_cond_rhs (last_stmt)))
> +                 && (EDGE_COUNT (pred->succs) > 1))
> +               {
> +                 edge t = EDGE_SUCC (pred, 0);
> +                 edge e = EDGE_SUCC (pred, 1);
> +
> +                 if (!(t->flags & EDGE_TRUE_VALUE))
> +                   std::swap (t, e);
> +
> +                 if ((is_or_expr && e->dest == curr_bb)
> +                     || (!is_or_expr && t->dest == curr_bb))
> +                   terms_in_preds.insert (cond_lhs);
> +               }
> +           }
> +       }
> +    }
> +
> +    expr_terms.insert (terms_in_preds.begin (), terms_in_preds.end ());
> +
> +    /* Initialize sets of related expressions.  */
> +    auto_vec<std::set<tree>> terminal_nodes;
> +    unsigned int i = 0;
> +    for (std::set<tree>::iterator it = expr_terms.begin ();
> +        it != expr_terms.end (); ++it, ++i)
> +      {
> +       tree op = *it;
> +       std::set<tree> related_terms = {op};
> +
> +       std::set<tree> op_terminal_nodes = get_terminal_nodes (op,
> +                                                       &terminal_nodes, i);
> +
> +       unsigned int j = i + 1;
> +       for (std::set<tree>::iterator next_it = std::next (it);
> +            next_it != expr_terms.end (); ++next_it, ++j)
> +         {
> +           tree next_op = *next_it;
> +           std::set<tree> next_op_term_nodes = get_terminal_nodes (next_op,
> +                                                       &terminal_nodes, j);
> +           std::set<tree> intersection;
> +           std::set_intersection (op_terminal_nodes.begin (),
> +                                  op_terminal_nodes.end (),
> +                                  next_op_term_nodes.begin (),
> +                                  next_op_term_nodes.end (),
> +                                  std::inserter (intersection,
> +                                                 intersection.begin ()));

With hash_set, you can't use set_intersection since hash tables
iterators are not sorted. But you do your own intersection here.

Thanks,
Andrew Pinski

> +           if (intersection.size () > 1)
> +             related_terms.insert (next_op);
> +         }
> +
> +       op_subexprsets.insert (related_terms);
> +      }
> +
> +  /* Iterate over op_subexprsets, analyzing and trying to fold the 
> expressions
> +     in each set of related expressions until reaching a fixed-point.  */
> +
> +  auto_vec<tree> solved_exprs;
> +  for (std::set<tree> expr_set : op_subexprsets)
> +    {
> +
> +      if (expr_set.size () < 2) continue;
> +
> +      std::set<gimple *> calc_stmts;
> +      std::set<gimple *> stmts_to_fold;
> +      bool any_change;
> +
> +      do {
> +       any_change = false;
> +       for (tree subexpr : expr_set)
> +         {
> +           gimple *def_stmt = SSA_NAME_DEF_STMT (subexpr);
> +           if (!is_gimple_assign (def_stmt))
> +             continue;
> +
> +           /* If the expression's def is an EQ or NE expression, store it in
> +              calc_stmts in order to use it to solve more complex
> +              expressions.  */
> +           tree_code def_stmt_code = gimple_assign_rhs_code (def_stmt);
> +           if ((def_stmt_code == EQ_EXPR || def_stmt_code == NE_EXPR)
> +               && (calc_stmts.find (def_stmt) == calc_stmts.end ())
> +               && (stmts_to_fold.find (def_stmt) == stmts_to_fold.end ()))
> +             {
> +               calc_stmts.insert (def_stmt);
> +               any_change = true;
> +             }
> +           else
> +             {
> +               std::set<tree> visited;
> +               if (solve_expr (subexpr, &calc_stmts, &stmts_to_fold,
> +                               &visited, is_or_expr))
> +                 solved_exprs.safe_push (subexpr);
> +             }
> +         }
> +      } while (any_change);
> +
> +      for (gimple *stmt : stmts_to_fold)
> +       {
> +         tree stmt_lhs = gimple_assign_lhs (stmt);
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           {
> +               fprintf (dump_file, "Folding ");
> +               print_generic_expr (dump_file, stmt_lhs);
> +               fprintf (dump_file, " to 0\n");
> +           }
> +
> +         operand_entry *oe;
> +         unsigned int i;
> +         tree zero = build_zero_cst (TREE_TYPE (stmt_lhs));
> +         FOR_EACH_VEC_ELT (*ops, i, oe)
> +           {
> +             if (oe->op == stmt_lhs)
> +               {
> +                 oe->op = zero;
> +                 reassociate_stats.ops_eliminated += ops->length () - 1;
> +                 ops->truncate (0);
> +                 ops->quick_push (oe);
> +               }
> +           }
> +
> +         gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt);
> +         gsi_remove (&stmt_gsi, true);
> +         replace_uses_by (stmt_lhs, zero);
> +         release_defs (stmt);
> +
> +         any_changes = true;
> +       }
> +    }
> +
> +    return any_changes;
> +}
> +
>  /* Optimize range tests, similarly how fold_range_test optimizes
>     it on trees.  The tree code for the binary
>     operation between all the operands is OPCODE.
> @@ -4170,6 +4513,7 @@ optimize_range_tests (enum tree_code opcode,
>                                                  ranges, first_bb);
>    any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
>                                                    ops, ranges);
> +  any_changes |= optimize_cmp_xor_exprs (opcode, ops);
>
>    if (any_changes && opcode != ERROR_MARK)
>      {
> --
> 2.47.0
>

Reply via email to