This brings FRE/PRE up to the same level as DOM in being able to remove redundant conditionals. It does so by inserting temporary conditional expressions proved to be true on single predecessor edges.
I've had to do a lot of testcase adjustments, thus the patch is now re-bootstrapping / testing on x86_64-unknown-linux-gnu. Richard. 2015-08-12 Richard Biener <rguent...@suse.de> * tree-ssa-sccvn.c (vn_nary_op_compute_hash): Also canonicalize comparison operand order and commutative ternary op operand order. (sccvn_dom_walker::cond_stack): New state to track temporary expressions. (sccvn_dom_walker::after_dom_children): Remove tempoary expressions no longer valid. (sccvn_dom_walker::record_cond): Add a single temporary conditional expression. (sccvn_dom_walker::record_conds): Add a temporary conditional expressions and all related expressions also true/false. (sccvn_dom_walker::before_dom_children): Record temporary expressions based on the controlling condition of a single predecessor. When trying to simplify a conditional statement lookup expressions we might have inserted earlier. * testsuite/g++.dg/tree-ssa/pr61034.C * testsuite/gcc.dg/fold-compare-2.c * testsuite/gcc.dg/pr50763.c * testsuite/gcc.dg/predict-3.c * testsuite/gcc.dg/tree-ssa/20030709-2.c * testsuite/gcc.dg/tree-ssa/pr19831-3.c * testsuite/gcc.dg/tree-ssa/pr20657.c * testsuite/gcc.dg/tree-ssa/pr21001.c * testsuite/gcc.dg/tree-ssa/pr37508.c * testsuite/gcc.dg/tree-ssa/ssa-fre-47.c * testsuite/gcc.dg/tree-ssa/ssa-fre-48.c * testsuite/gcc.dg/tree-ssa/ssa-fre-49.c * testsuite/gcc.dg/tree-ssa/vrp04.c * testsuite/gcc.dg/tree-ssa/vrp07.c * testsuite/gcc.dg/tree-ssa/vrp09.c * testsuite/gcc.dg/tree-ssa/vrp16.c * testsuite/gcc.dg/tree-ssa/vrp20.c * testsuite/gcc.dg/tree-ssa/vrp25.c * testsuite/gcc.dg/tree-ssa/vrp87.c Index: gcc/testsuite/g++.dg/tree-ssa/pr61034.C =================================================================== --- gcc/testsuite/g++.dg/tree-ssa/pr61034.C (revision 226802) +++ gcc/testsuite/g++.dg/tree-ssa/pr61034.C (working copy) @@ -43,5 +43,5 @@ bool f(I a, I b, I c, I d) { // This works only if everything is inlined into 'f'. // { dg-final { scan-tree-dump-times ";; Function" 1 "fre2" } } -// { dg-final { scan-tree-dump-times "free" 18 "fre2" } } +// { dg-final { scan-tree-dump-times "free" 10 "fre2" } } // { dg-final { scan-tree-dump-times "unreachable" 11 "fre2" } } Index: gcc/testsuite/gcc.dg/fold-compare-2.c =================================================================== --- gcc/testsuite/gcc.dg/fold-compare-2.c (revision 226802) +++ gcc/testsuite/gcc.dg/fold-compare-2.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-tree-tail-merge -fdump-tree-vrp1" } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ extern void abort (void); @@ -15,5 +15,5 @@ main(void) return 0; } -/* { dg-final { scan-tree-dump-times "Removing basic block" 2 "vrp1" } } */ +/* { dg-final { scan-tree-dump-times "Removing basic block" 2 "fre1" } } */ Index: gcc/testsuite/gcc.dg/pr50763.c =================================================================== --- gcc/testsuite/gcc.dg/pr50763.c (revision 226802) +++ gcc/testsuite/gcc.dg/pr50763.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-tail-merge -fno-tree-dominator-opts -fdump-tree-pre" } */ +/* { dg-options "-O2 -ftree-tail-merge -fno-tree-dominator-opts" } */ int bar (int i); @@ -11,5 +11,3 @@ foo (int c, int d) d = 33; while (c == d); } - -/* { dg-final { scan-tree-dump-times "== 33" 2 "pre"} } */ Index: gcc/testsuite/gcc.dg/predict-3.c =================================================================== --- gcc/testsuite/gcc.dg/predict-3.c (revision 226802) +++ gcc/testsuite/gcc.dg/predict-3.c (working copy) @@ -12,6 +12,10 @@ void foo (int bound) { if (i < bound - 2) global += bar (i); + /* The following test is redundant with the loop bound check in the + for stmt and thus eliminated by FRE which makes the controlled + stmt always executed and thus equivalent to 100%. Thus the + heuristic only applies three times. */ if (i <= bound) global += bar (i); if (i + 1 < bound) @@ -21,4 +25,4 @@ void foo (int bound) } } -/* { dg-final { scan-tree-dump-times "loop iv compare heuristics: 100.0%" 4 "profile_estimate"} } */ +/* { dg-final { scan-tree-dump-times "loop iv compare heuristics: 100.0%" 3 "profile_estimate"} } */ Index: gcc/testsuite/gcc.dg/tree-ssa/20030709-2.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/20030709-2.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/20030709-2.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-cddce2" } */ +/* { dg-options "-O -fdump-tree-dce2" } */ struct rtx_def; typedef struct rtx_def *rtx; @@ -42,13 +42,13 @@ get_alias_set (t) /* There should be precisely one load of ->decl.rtl. If there is more than, then the dominator optimizations failed. */ -/* { dg-final { scan-tree-dump-times "->decl\\.rtl" 1 "cddce2"} } */ +/* { dg-final { scan-tree-dump-times "->decl\\.rtl" 1 "dce2"} } */ /* There should be no loads of .rtmem since the complex return statement is just "return 0". */ -/* { dg-final { scan-tree-dump-times ".rtmem" 0 "cddce2"} } */ +/* { dg-final { scan-tree-dump-times ".rtmem" 0 "dce2"} } */ /* There should be one IF statement (the complex return statement should collapse down to a simple return 0 without any conditionals). */ -/* { dg-final { scan-tree-dump-times "if " 1 "cddce2"} } */ +/* { dg-final { scan-tree-dump-times "if " 1 "dce2"} } */ Index: gcc/testsuite/gcc.dg/tree-ssa/pr19831-3.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr19831-3.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/pr19831-3.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */ void test2(void) { Index: gcc/testsuite/gcc.dg/tree-ssa/pr20657.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr20657.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/pr20657.c (working copy) @@ -3,7 +3,7 @@ statement, which was needed to eliminate the second "if" statement. */ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-tree-dominator-opts -fdump-tree-vrp1-details" } */ +/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-fre -fdump-tree-vrp1-details" } */ int foo (int a) Index: gcc/testsuite/gcc.dg/tree-ssa/pr21001.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr21001.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/pr21001.c (working copy) @@ -5,7 +5,7 @@ range information out of the conditional. */ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-tree-dominator-opts -fdump-tree-vrp1-details" } */ +/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-fre -fdump-tree-vrp1-details" } */ int foo (int a) Index: gcc/testsuite/gcc.dg/tree-ssa/pr37508.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr37508.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/pr37508.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp1" } */ +/* { dg-options "-O2 -fno-tree-fre -fdump-tree-vrp1" } */ struct foo1 { int i:1; @@ -10,9 +10,10 @@ struct foo2 { int test1 (struct foo1 *x) { - if (x->i == 0) + int i = x->i; + if (i == 0) return 1; - else if (x->i == -1) + else if (i == -1) return 1; return 0; } @@ -37,9 +38,10 @@ int test3 (struct foo1 *x) int test4 (struct foo2 *x) { - if (x->i == 0) + unsigned int i = x->i; + if (i == 0) return 1; - else if (x->i == 1) + else if (i == 1) return 1; return 0; } Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-47.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-47.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-47.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1" } */ + +int foo (int i) +{ + if (i) + { + if (i) + return 0; + else + return 1; + } + return 0; +} + +/* { dg-final { scan-tree-dump "return 0;" "fre1" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-48.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-48.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-48.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1-details" } */ + +int foo (int i) +{ + if (i) + { + if (i) + return 1; + else + return 0; + } + return 0; +} + +/* { dg-final { scan-tree-dump "Removing unexecutable edge" "fre1" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-49.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-49.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-49.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1" } */ + +int foo (int i, int j) +{ + if (i < j) + { + if (i <= j) + return j > i; + else + return 0; + } + return 1; +} + +/* { dg-final { scan-tree-dump "return 1;" "fre1" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/vrp04.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp04.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp04.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp1" } */ +/* { dg-options "-O2 -fno-tree-fre -fdump-tree-vrp1" } */ int foo (int a, int b) Index: gcc/testsuite/gcc.dg/tree-ssa/vrp07.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp07.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp07.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp1-details -fdelete-null-pointer-checks" } */ +/* { dg-options "-O2 -fno-tree-fre -fdump-tree-vrp1-details -fdelete-null-pointer-checks" } */ int foo (int i, int *p) Index: gcc/testsuite/gcc.dg/tree-ssa/vrp09.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp09.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp09.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp1 -std=gnu89" } */ +/* { dg-options "-O2 -fno-tree-fre -fdump-tree-vrp1 -std=gnu89" } */ foo (int *p) { Index: gcc/testsuite/gcc.dg/tree-ssa/vrp16.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp16.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp16.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp1-details" } */ +/* { dg-options "-O2 -fno-tree-fre -fdump-tree-vrp1-details" } */ extern void abort (void) __attribute__ ((__noreturn__)); @@ -12,9 +12,10 @@ struct rtx_def int nonlocal_mentioned_p (rtx x) { - if (x->code == 6 || x->code == 7) - if (x->code == 7) - if (x->code != 7) + int code = x->code; + if (code == 6 || code == 7) + if (code == 7) + if (code != 7) abort (); } Index: gcc/testsuite/gcc.dg/tree-ssa/vrp20.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp20.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp20.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fwrapv -O1 -ftree-vrp -fdump-tree-vrp1" } */ +/* { dg-options "-fwrapv -O1 -fno-tree-fre -ftree-vrp -fdump-tree-vrp1" } */ extern void abort (); extern void exit (int); Index: gcc/testsuite/gcc.dg/tree-ssa/vrp25.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp25.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp25.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp1-details" } */ +/* { dg-options "-O2 -fno-tree-fre -fdump-tree-vrp1-details" } */ extern void abort (); extern void arf (); Index: gcc/testsuite/gcc.dg/tree-ssa/vrp87.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp87.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp87.c (working copy) @@ -1,8 +1,8 @@ /* Setting LOGICAL_OP_NON_SHORT_CIRCUIT to 0 leads to two conditional jumps - when evaluating an && condition. VRP is not able to optimize this. */ + when evaluating an && condition. */ /* { dg-do compile { target { ! { logical_op_short_circuit || { m68k*-*-* mmix*-*-* mep*-*-* bfin*-*-* v850*-*-* moxie*-*-* cris*-*-* m32c*-*-* fr30*-*-* mcore*-*-* powerpc*-*-* xtensa*-*-* hppa*-*-* } } } } } */ -/* { dg-options "-O2 -fdump-tree-vrp2-details -fdump-tree-cddce2-details" } */ +/* { dg-options "-O2 -fdump-tree-fre1-details" } */ struct bitmap_head_def; typedef struct bitmap_head_def *bitmap; @@ -74,9 +74,6 @@ bitmap_ior_into (bitmap a, const_bitmap return changed; } -/* Verify that VRP simplified an "if" statement. */ -/* { dg-final { scan-tree-dump "Folded into: if.*" "vrp2"} } */ -/* Verify that DCE after VRP2 eliminates a dead conversion - to a (Bool). */ -/* { dg-final { scan-tree-dump "Deleting.*_Bool.*;" "cddce2"} } */ - +/* Verify that FRE simplified an if stmt. */ +/* { dg-final { scan-tree-dump "Replaced a_elt_\[0-9\]+ != 0B with 1" "fre1" } } */ +/* { dg-final { scan-tree-dump "Replaced _\[0-9\]+ & _\[0-9\]+ with _\[0-9\]+" "fre1" } } */ Index: gcc/tree-ssa-sccvn.c =================================================================== --- gcc/tree-ssa-sccvn.c (revision 226802) +++ gcc/tree-ssa-sccvn.c (working copy) @@ -2365,10 +2365,18 @@ vn_nary_op_compute_hash (const vn_nary_o if (TREE_CODE (vno1->op[i]) == SSA_NAME) vno1->op[i] = SSA_VAL (vno1->op[i]); - if (vno1->length == 2 - && commutative_tree_code (vno1->opcode) + if (((vno1->length == 2 + && commutative_tree_code (vno1->opcode)) + || (vno1->length == 3 + && commutative_ternary_tree_code (vno1->opcode))) && tree_swap_operands_p (vno1->op[0], vno1->op[1], false)) std::swap (vno1->op[0], vno1->op[1]); + else if (TREE_CODE_CLASS (vno1->opcode) == tcc_comparison + && tree_swap_operands_p (vno1->op[0], vno1->op[1], false)) + { + std::swap (vno1->op[0], vno1->op[1]); + vno1->opcode = swap_tree_comparison (vno1->opcode); + } hstate.add_int (vno1->opcode); for (i = 0; i < vno1->length; ++i) @@ -4281,13 +4289,105 @@ set_hashtable_value_ids (void) class sccvn_dom_walker : public dom_walker { public: - sccvn_dom_walker () : dom_walker (CDI_DOMINATORS), fail (false) {} + sccvn_dom_walker () + : dom_walker (CDI_DOMINATORS), fail (false), cond_stack (vNULL) {} virtual void before_dom_children (basic_block); + virtual void after_dom_children (basic_block); + + void record_cond (basic_block, + enum tree_code code, tree lhs, tree rhs, bool value); + void record_conds (basic_block, + enum tree_code code, tree lhs, tree rhs, bool value); bool fail; + vec<std::pair <basic_block, std::pair <vn_nary_op_t, vn_nary_op_t> > > + cond_stack; }; +/* Record a temporary condition for the BB and its dominated blocks. */ + +void +sccvn_dom_walker::record_cond (basic_block bb, + enum tree_code code, tree lhs, tree rhs, + bool value) +{ + tree ops[2] = { lhs, rhs }; + vn_nary_op_t old = NULL; + if (vn_nary_op_lookup_pieces (2, code, boolean_type_node, ops, &old)) + current_info->nary->remove_elt_with_hash (old, old->hashcode); + vn_nary_op_t cond + = vn_nary_op_insert_pieces (2, code, boolean_type_node, ops, + value + ? boolean_true_node + : boolean_false_node, 0); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Recording temporarily "); + print_generic_expr (dump_file, ops[0], TDF_SLIM); + fprintf (dump_file, " %s ", get_tree_code_name (code)); + print_generic_expr (dump_file, ops[1], TDF_SLIM); + fprintf (dump_file, " == %s%s\n", + value ? "true" : "false", + old ? " (old entry saved)" : ""); + } + cond_stack.safe_push (std::make_pair (bb, std::make_pair (cond, old))); +} + +/* Record temporary conditions for the BB and its dominated blocks + according to LHS CODE RHS == VALUE and its dominated conditions. */ + +void +sccvn_dom_walker::record_conds (basic_block bb, + enum tree_code code, tree lhs, tree rhs, + bool value) +{ + /* Record the original condition. */ + record_cond (bb, code, lhs, rhs, value); + + if (!value) + return; + + /* Record dominated conditions if the condition is true. Note that + the inversion is already recorded. */ + switch (code) + { + case LT_EXPR: + case GT_EXPR: + record_cond (bb, code == LT_EXPR ? LE_EXPR : GE_EXPR, lhs, rhs, true); + record_cond (bb, NE_EXPR, lhs, rhs, true); + record_cond (bb, EQ_EXPR, lhs, rhs, false); + break; + + case EQ_EXPR: + record_cond (bb, LE_EXPR, lhs, rhs, true); + record_cond (bb, GE_EXPR, lhs, rhs, true); + record_cond (bb, LT_EXPR, lhs, rhs, false); + record_cond (bb, GT_EXPR, lhs, rhs, false); + break; + + default: + break; + } +} + +/* Restore expressions and values derived from conditionals. */ + +void +sccvn_dom_walker::after_dom_children (basic_block bb) +{ + while (!cond_stack.is_empty () + && cond_stack.last ().first == bb) + { + vn_nary_op_t cond = cond_stack.last ().second.first; + vn_nary_op_t old = cond_stack.last ().second.second; + current_info->nary->remove_elt_with_hash (cond, cond->hashcode); + if (old) + vn_nary_op_insert_into (old, current_info->nary, false); + cond_stack.pop (); + } +} + /* Value number all statements in BB. */ void @@ -4320,6 +4420,27 @@ sccvn_dom_walker::before_dom_children (b return; } + /* If we have a single predecessor record the equivalence from a + possible condition on the predecessor edge. */ + if (single_pred_p (bb)) + { + edge e = single_pred_edge (bb); + gimple stmt = last_stmt (e->src); + if (stmt + && gimple_code (stmt) == GIMPLE_COND) + { + enum tree_code code = gimple_cond_code (stmt); + tree lhs = gimple_cond_lhs (stmt); + tree rhs = gimple_cond_rhs (stmt); + record_conds (bb, code, lhs, rhs, + (e->flags & EDGE_TRUE_VALUE) != 0); + code = invert_tree_comparison (code, HONOR_NANS (lhs)); + if (code != ERROR_MARK) + record_conds (bb, code, lhs, rhs, + (e->flags & EDGE_TRUE_VALUE) == 0); + } + } + /* Value-number all defs in the basic-block. */ for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) @@ -4389,6 +4510,16 @@ sccvn_dom_walker::before_dom_children (b rhs = vn_get_expr_for (rhs); val = fold_binary (gimple_cond_code (stmt), boolean_type_node, lhs, rhs); + /* If that didn't simplify to a constant see if we have recorded + temporary expressions from taken edges. */ + if (!val || TREE_CODE (val) != INTEGER_CST) + { + tree ops[2]; + ops[0] = gimple_cond_lhs (stmt); + ops[1] = gimple_cond_rhs (stmt); + val = vn_nary_op_lookup_pieces (2, gimple_cond_code (stmt), + boolean_type_node, ops, NULL); + } break; } case GIMPLE_SWITCH: