On Wed, 12 Aug 2015, Richard Biener wrote: > > This brings FRE/PRE up to the same level as DOM in being able to > remove redundant conditionals. It does so by inserting temporary > conditional expressions proved to be true on single predecessor > edges. > > I've had to do a lot of testcase adjustments, thus the patch is > now re-bootstrapping / testing on x86_64-unknown-linux-gnu.
I've applied with a slight change, trimming down the number of equivalences recorded (basically only record anything off conditions not already optimized to go either way). Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2015-08-12 Richard Biener <rguent...@suse.de> * tree-ssa-sccvn.c (vn_nary_op_compute_hash): Also canonicalize comparison operand order and commutative ternary op operand order. (sccvn_dom_walker::cond_stack): New state to track temporary expressions. (sccvn_dom_walker::after_dom_children): Remove tempoary expressions no longer valid. (sccvn_dom_walker::record_cond): Add a single temporary conditional expression. (sccvn_dom_walker::record_conds): Add a temporary conditional expressions and all related expressions also true/false. (sccvn_dom_walker::before_dom_children): Record temporary expressions based on the controlling condition of a single predecessor. When trying to simplify a conditional statement lookup expressions we might have inserted earlier. * testsuite/gcc.dg/tree-ssa/ssa-fre-47.c: New testcase. * testsuite/gcc.dg/tree-ssa/ssa-fre-48.c: Likewise. * testsuite/gcc.dg/tree-ssa/ssa-fre-49.c: Likewise. * testsuite/g++.dg/tree-ssa/pr61034.C: Adjust. * testsuite/gcc.dg/fold-compare-2.c: Likewise. * testsuite/gcc.dg/pr50763.c: Likewise. * testsuite/gcc.dg/predict-3.c: Likewise. * testsuite/gcc.dg/tree-ssa/20030709-2.c: Likewise. * testsuite/gcc.dg/tree-ssa/pr19831-3.c: Likewise. * testsuite/gcc.dg/tree-ssa/pr20657.c: Likewise. * testsuite/gcc.dg/tree-ssa/pr21001.c: Likewise. * testsuite/gcc.dg/tree-ssa/pr37508.c: Likewise. * testsuite/gcc.dg/tree-ssa/vrp04.c: Likewise. * testsuite/gcc.dg/tree-ssa/vrp07.c: Likewise. * testsuite/gcc.dg/tree-ssa/vrp09.c: Likewise. * testsuite/gcc.dg/tree-ssa/vrp16.c: Likewise. * testsuite/gcc.dg/tree-ssa/vrp20.c: Likewise. * testsuite/gcc.dg/tree-ssa/vrp25.c: Likewise. * testsuite/gcc.dg/tree-ssa/vrp87.c: Likewise. Index: gcc/testsuite/g++.dg/tree-ssa/pr61034.C =================================================================== --- gcc/testsuite/g++.dg/tree-ssa/pr61034.C (revision 226802) +++ gcc/testsuite/g++.dg/tree-ssa/pr61034.C (working copy) @@ -43,5 +43,5 @@ bool f(I a, I b, I c, I d) { // This works only if everything is inlined into 'f'. // { dg-final { scan-tree-dump-times ";; Function" 1 "fre2" } } -// { dg-final { scan-tree-dump-times "free" 18 "fre2" } } +// { dg-final { scan-tree-dump-times "free" 10 "fre2" } } // { dg-final { scan-tree-dump-times "unreachable" 11 "fre2" } } Index: gcc/testsuite/gcc.dg/fold-compare-2.c =================================================================== --- gcc/testsuite/gcc.dg/fold-compare-2.c (revision 226802) +++ gcc/testsuite/gcc.dg/fold-compare-2.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-tree-tail-merge -fdump-tree-vrp1" } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ extern void abort (void); @@ -15,5 +15,5 @@ main(void) return 0; } -/* { dg-final { scan-tree-dump-times "Removing basic block" 2 "vrp1" } } */ +/* { dg-final { scan-tree-dump-times "Removing basic block" 2 "fre1" } } */ Index: gcc/testsuite/gcc.dg/pr50763.c =================================================================== --- gcc/testsuite/gcc.dg/pr50763.c (revision 226802) +++ gcc/testsuite/gcc.dg/pr50763.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-tail-merge -fno-tree-dominator-opts -fdump-tree-pre" } */ +/* { dg-options "-O2 -ftree-tail-merge -fno-tree-dominator-opts" } */ int bar (int i); @@ -11,5 +11,3 @@ foo (int c, int d) d = 33; while (c == d); } - -/* { dg-final { scan-tree-dump-times "== 33" 2 "pre"} } */ Index: gcc/testsuite/gcc.dg/predict-3.c =================================================================== --- gcc/testsuite/gcc.dg/predict-3.c (revision 226802) +++ gcc/testsuite/gcc.dg/predict-3.c (working copy) @@ -12,6 +12,10 @@ void foo (int bound) { if (i < bound - 2) global += bar (i); + /* The following test is redundant with the loop bound check in the + for stmt and thus eliminated by FRE which makes the controlled + stmt always executed and thus equivalent to 100%. Thus the + heuristic only applies three times. */ if (i <= bound) global += bar (i); if (i + 1 < bound) @@ -21,4 +25,4 @@ void foo (int bound) } } -/* { dg-final { scan-tree-dump-times "loop iv compare heuristics: 100.0%" 4 "profile_estimate"} } */ +/* { dg-final { scan-tree-dump-times "loop iv compare heuristics: 100.0%" 3 "profile_estimate"} } */ Index: gcc/testsuite/gcc.dg/tree-ssa/20030709-2.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/20030709-2.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/20030709-2.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-cddce2" } */ +/* { dg-options "-O -fdump-tree-dce2" } */ struct rtx_def; typedef struct rtx_def *rtx; @@ -42,13 +42,13 @@ get_alias_set (t) /* There should be precisely one load of ->decl.rtl. If there is more than, then the dominator optimizations failed. */ -/* { dg-final { scan-tree-dump-times "->decl\\.rtl" 1 "cddce2"} } */ +/* { dg-final { scan-tree-dump-times "->decl\\.rtl" 1 "dce2"} } */ /* There should be no loads of .rtmem since the complex return statement is just "return 0". */ -/* { dg-final { scan-tree-dump-times ".rtmem" 0 "cddce2"} } */ +/* { dg-final { scan-tree-dump-times ".rtmem" 0 "dce2"} } */ /* There should be one IF statement (the complex return statement should collapse down to a simple return 0 without any conditionals). */ -/* { dg-final { scan-tree-dump-times "if " 1 "cddce2"} } */ +/* { dg-final { scan-tree-dump-times "if " 1 "dce2"} } */ Index: gcc/testsuite/gcc.dg/tree-ssa/pr19831-3.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr19831-3.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/pr19831-3.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */ void test2(void) { Index: gcc/testsuite/gcc.dg/tree-ssa/pr20657.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr20657.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/pr20657.c (working copy) @@ -3,7 +3,7 @@ statement, which was needed to eliminate the second "if" statement. */ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-tree-dominator-opts -fdump-tree-vrp1-details" } */ +/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-fre -fdump-tree-vrp1-details" } */ int foo (int a) Index: gcc/testsuite/gcc.dg/tree-ssa/pr21001.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr21001.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/pr21001.c (working copy) @@ -5,7 +5,7 @@ range information out of the conditional. */ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-tree-dominator-opts -fdump-tree-vrp1-details" } */ +/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-fre -fdump-tree-vrp1-details" } */ int foo (int a) Index: gcc/testsuite/gcc.dg/tree-ssa/pr37508.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr37508.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/pr37508.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp1" } */ +/* { dg-options "-O2 -fno-tree-fre -fdump-tree-vrp1" } */ struct foo1 { int i:1; @@ -10,9 +10,10 @@ struct foo2 { int test1 (struct foo1 *x) { - if (x->i == 0) + int i = x->i; + if (i == 0) return 1; - else if (x->i == -1) + else if (i == -1) return 1; return 0; } @@ -37,9 +38,10 @@ int test3 (struct foo1 *x) int test4 (struct foo2 *x) { - if (x->i == 0) + unsigned int i = x->i; + if (i == 0) return 1; - else if (x->i == 1) + else if (i == 1) return 1; return 0; } Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-47.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-47.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-47.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1" } */ + +int foo (int i) +{ + if (i) + { + if (i) + return 0; + else + return 1; + } + return 0; +} + +/* { dg-final { scan-tree-dump "return 0;" "fre1" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-48.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-48.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-48.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1-details" } */ + +int foo (int i) +{ + if (i) + { + if (i) + return 1; + else + return 0; + } + return 0; +} + +/* { dg-final { scan-tree-dump "Removing unexecutable edge" "fre1" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-49.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-49.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-49.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1" } */ + +int foo (int i, int j) +{ + if (i < j) + { + if (i <= j) + return j > i; + else + return 0; + } + return 1; +} + +/* { dg-final { scan-tree-dump "return 1;" "fre1" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/vrp04.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp04.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp04.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp1" } */ +/* { dg-options "-O2 -fno-tree-fre -fdump-tree-vrp1" } */ int foo (int a, int b) Index: gcc/testsuite/gcc.dg/tree-ssa/vrp07.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp07.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp07.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp1-details -fdelete-null-pointer-checks" } */ +/* { dg-options "-O2 -fno-tree-fre -fdump-tree-vrp1-details -fdelete-null-pointer-checks" } */ int foo (int i, int *p) Index: gcc/testsuite/gcc.dg/tree-ssa/vrp09.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp09.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp09.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp1 -std=gnu89" } */ +/* { dg-options "-O2 -fno-tree-fre -fdump-tree-vrp1 -std=gnu89" } */ foo (int *p) { Index: gcc/testsuite/gcc.dg/tree-ssa/vrp16.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp16.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp16.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp1-details" } */ +/* { dg-options "-O2 -fno-tree-fre -fdump-tree-vrp1-details" } */ extern void abort (void) __attribute__ ((__noreturn__)); @@ -12,9 +12,10 @@ struct rtx_def int nonlocal_mentioned_p (rtx x) { - if (x->code == 6 || x->code == 7) - if (x->code == 7) - if (x->code != 7) + int code = x->code; + if (code == 6 || code == 7) + if (code == 7) + if (code != 7) abort (); } Index: gcc/testsuite/gcc.dg/tree-ssa/vrp20.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp20.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp20.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fwrapv -O1 -ftree-vrp -fdump-tree-vrp1" } */ +/* { dg-options "-fwrapv -O1 -fno-tree-fre -ftree-vrp -fdump-tree-vrp1" } */ extern void abort (); extern void exit (int); Index: gcc/testsuite/gcc.dg/tree-ssa/vrp25.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp25.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp25.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp1-details" } */ +/* { dg-options "-O2 -fno-tree-fre -fdump-tree-vrp1-details" } */ extern void abort (); extern void arf (); Index: gcc/testsuite/gcc.dg/tree-ssa/vrp87.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp87.c (revision 226802) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp87.c (working copy) @@ -1,8 +1,8 @@ /* Setting LOGICAL_OP_NON_SHORT_CIRCUIT to 0 leads to two conditional jumps - when evaluating an && condition. VRP is not able to optimize this. */ + when evaluating an && condition. */ /* { dg-do compile { target { ! { logical_op_short_circuit || { m68k*-*-* mmix*-*-* mep*-*-* bfin*-*-* v850*-*-* moxie*-*-* cris*-*-* m32c*-*-* fr30*-*-* mcore*-*-* powerpc*-*-* xtensa*-*-* hppa*-*-* } } } } } */ -/* { dg-options "-O2 -fdump-tree-vrp2-details -fdump-tree-cddce2-details" } */ +/* { dg-options "-O2 -fdump-tree-fre1-details" } */ struct bitmap_head_def; typedef struct bitmap_head_def *bitmap; @@ -74,9 +74,6 @@ bitmap_ior_into (bitmap a, const_bitmap return changed; } -/* Verify that VRP simplified an "if" statement. */ -/* { dg-final { scan-tree-dump "Folded into: if.*" "vrp2"} } */ -/* Verify that DCE after VRP2 eliminates a dead conversion - to a (Bool). */ -/* { dg-final { scan-tree-dump "Deleting.*_Bool.*;" "cddce2"} } */ - +/* Verify that FRE simplified an if stmt. */ +/* { dg-final { scan-tree-dump "Replaced a_elt_\[0-9\]+ != 0B with 1" "fre1" } } */ +/* { dg-final { scan-tree-dump "Replaced _\[0-9\]+ & _\[0-9\]+ with _\[0-9\]+" "fre1" } } */ Index: gcc/tree-ssa-sccvn.c =================================================================== --- gcc/tree-ssa-sccvn.c (revision 226802) +++ gcc/tree-ssa-sccvn.c (working copy) @@ -2365,10 +2365,18 @@ vn_nary_op_compute_hash (const vn_nary_o if (TREE_CODE (vno1->op[i]) == SSA_NAME) vno1->op[i] = SSA_VAL (vno1->op[i]); - if (vno1->length == 2 - && commutative_tree_code (vno1->opcode) + if (((vno1->length == 2 + && commutative_tree_code (vno1->opcode)) + || (vno1->length == 3 + && commutative_ternary_tree_code (vno1->opcode))) && tree_swap_operands_p (vno1->op[0], vno1->op[1], false)) std::swap (vno1->op[0], vno1->op[1]); + else if (TREE_CODE_CLASS (vno1->opcode) == tcc_comparison + && tree_swap_operands_p (vno1->op[0], vno1->op[1], false)) + { + std::swap (vno1->op[0], vno1->op[1]); + vno1->opcode = swap_tree_comparison (vno1->opcode); + } hstate.add_int (vno1->opcode); for (i = 0; i < vno1->length; ++i) @@ -4281,13 +4289,105 @@ set_hashtable_value_ids (void) class sccvn_dom_walker : public dom_walker { public: - sccvn_dom_walker () : dom_walker (CDI_DOMINATORS), fail (false) {} + sccvn_dom_walker () + : dom_walker (CDI_DOMINATORS), fail (false), cond_stack (vNULL) {} virtual void before_dom_children (basic_block); + virtual void after_dom_children (basic_block); + + void record_cond (basic_block, + enum tree_code code, tree lhs, tree rhs, bool value); + void record_conds (basic_block, + enum tree_code code, tree lhs, tree rhs, bool value); bool fail; + vec<std::pair <basic_block, std::pair <vn_nary_op_t, vn_nary_op_t> > > + cond_stack; }; +/* Record a temporary condition for the BB and its dominated blocks. */ + +void +sccvn_dom_walker::record_cond (basic_block bb, + enum tree_code code, tree lhs, tree rhs, + bool value) +{ + tree ops[2] = { lhs, rhs }; + vn_nary_op_t old = NULL; + if (vn_nary_op_lookup_pieces (2, code, boolean_type_node, ops, &old)) + current_info->nary->remove_elt_with_hash (old, old->hashcode); + vn_nary_op_t cond + = vn_nary_op_insert_pieces (2, code, boolean_type_node, ops, + value + ? boolean_true_node + : boolean_false_node, 0); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Recording temporarily "); + print_generic_expr (dump_file, ops[0], TDF_SLIM); + fprintf (dump_file, " %s ", get_tree_code_name (code)); + print_generic_expr (dump_file, ops[1], TDF_SLIM); + fprintf (dump_file, " == %s%s\n", + value ? "true" : "false", + old ? " (old entry saved)" : ""); + } + cond_stack.safe_push (std::make_pair (bb, std::make_pair (cond, old))); +} + +/* Record temporary conditions for the BB and its dominated blocks + according to LHS CODE RHS == VALUE and its dominated conditions. */ + +void +sccvn_dom_walker::record_conds (basic_block bb, + enum tree_code code, tree lhs, tree rhs, + bool value) +{ + /* Record the original condition. */ + record_cond (bb, code, lhs, rhs, value); + + if (!value) + return; + + /* Record dominated conditions if the condition is true. Note that + the inversion is already recorded. */ + switch (code) + { + case LT_EXPR: + case GT_EXPR: + record_cond (bb, code == LT_EXPR ? LE_EXPR : GE_EXPR, lhs, rhs, true); + record_cond (bb, NE_EXPR, lhs, rhs, true); + record_cond (bb, EQ_EXPR, lhs, rhs, false); + break; + + case EQ_EXPR: + record_cond (bb, LE_EXPR, lhs, rhs, true); + record_cond (bb, GE_EXPR, lhs, rhs, true); + record_cond (bb, LT_EXPR, lhs, rhs, false); + record_cond (bb, GT_EXPR, lhs, rhs, false); + break; + + default: + break; + } +} + +/* Restore expressions and values derived from conditionals. */ + +void +sccvn_dom_walker::after_dom_children (basic_block bb) +{ + while (!cond_stack.is_empty () + && cond_stack.last ().first == bb) + { + vn_nary_op_t cond = cond_stack.last ().second.first; + vn_nary_op_t old = cond_stack.last ().second.second; + current_info->nary->remove_elt_with_hash (cond, cond->hashcode); + if (old) + vn_nary_op_insert_into (old, current_info->nary, false); + cond_stack.pop (); + } +} + /* Value number all statements in BB. */ void @@ -4320,6 +4420,39 @@ sccvn_dom_walker::before_dom_children (b return; } + /* If we have a single predecessor record the equivalence from a + possible condition on the predecessor edge. */ + if (single_pred_p (bb)) + { + edge e = single_pred_edge (bb); + /* Check if there are multiple executable successor edges in + the source block. Otherwise there is no additional info + to be recorded. */ + edge e2; + FOR_EACH_EDGE (e2, ei, e->src->succs) + if (e2 != e + && e2->flags & EDGE_EXECUTABLE) + break; + if (e2 && (e2->flags & EDGE_EXECUTABLE)) + { + + gimple stmt = last_stmt (e->src); + if (stmt + && gimple_code (stmt) == GIMPLE_COND) + { + enum tree_code code = gimple_cond_code (stmt); + tree lhs = gimple_cond_lhs (stmt); + tree rhs = gimple_cond_rhs (stmt); + record_conds (bb, code, lhs, rhs, + (e->flags & EDGE_TRUE_VALUE) != 0); + code = invert_tree_comparison (code, HONOR_NANS (lhs)); + if (code != ERROR_MARK) + record_conds (bb, code, lhs, rhs, + (e->flags & EDGE_TRUE_VALUE) == 0); + } + } + } + /* Value-number all defs in the basic-block. */ for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) @@ -4389,6 +4522,16 @@ sccvn_dom_walker::before_dom_children (b rhs = vn_get_expr_for (rhs); val = fold_binary (gimple_cond_code (stmt), boolean_type_node, lhs, rhs); + /* If that didn't simplify to a constant see if we have recorded + temporary expressions from taken edges. */ + if (!val || TREE_CODE (val) != INTEGER_CST) + { + tree ops[2]; + ops[0] = gimple_cond_lhs (stmt); + ops[1] = gimple_cond_rhs (stmt); + val = vn_nary_op_lookup_pieces (2, gimple_cond_code (stmt), + boolean_type_node, ops, NULL); + } break; } case GIMPLE_SWITCH: