The following adds the missing capability to sink loads to tree-ssa-sink.c. This enables sinking of loads and dependent expressions into code paths that uses them (thus performing partial dead code elimination on loads).
The algorithm is simple (similar to that sinking stores) to be light-weight on compile-time thus it may miss some opportunities but it fires quite a bit on GCC itself. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2014-06-06 Richard Biener <rguent...@suse.de> PR tree-optimization/59299 * tree-ssa-sink.c (all_immediate_uses_same_place): Work on a def operand. (nearest_common_dominator_of_uses): Likewise. (statement_sink_location): Adjust. Support sinking loads. * gcc.dg/tree-ssa/ssa-sink-10.c: New testcase. Index: gcc/tree-ssa-sink.c =================================================================== *** gcc/tree-ssa-sink.c.orig 2014-06-06 10:26:37.533999642 +0200 --- gcc/tree-ssa-sink.c 2014-06-06 15:20:50.886784230 +0200 *************** find_bb_for_arg (gimple phi, tree def) *** 110,135 **** used in, so that you only have one place you can sink it to. */ static bool ! all_immediate_uses_same_place (gimple stmt) { ! gimple firstuse = NULL; ! ssa_op_iter op_iter; imm_use_iterator imm_iter; use_operand_p use_p; - tree var; ! FOR_EACH_SSA_TREE_OPERAND (var, stmt, op_iter, SSA_OP_ALL_DEFS) { ! FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var) ! { ! if (is_gimple_debug (USE_STMT (use_p))) ! continue; ! if (firstuse == NULL) ! firstuse = USE_STMT (use_p); ! else ! if (firstuse != USE_STMT (use_p)) ! return false; ! } } return true; --- 110,131 ---- used in, so that you only have one place you can sink it to. */ static bool ! all_immediate_uses_same_place (def_operand_p def_p) { ! tree var = DEF_FROM_PTR (def_p); imm_use_iterator imm_iter; use_operand_p use_p; ! gimple firstuse = NULL; ! FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var) { ! if (is_gimple_debug (USE_STMT (use_p))) ! continue; ! if (firstuse == NULL) ! firstuse = USE_STMT (use_p); ! else ! if (firstuse != USE_STMT (use_p)) ! return false; } return true; *************** all_immediate_uses_same_place (gimple st *** 138,186 **** /* Find the nearest common dominator of all of the immediate uses in IMM. */ static basic_block ! nearest_common_dominator_of_uses (gimple stmt, bool *debug_stmts) { bitmap blocks = BITMAP_ALLOC (NULL); basic_block commondom; unsigned int j; bitmap_iterator bi; - ssa_op_iter op_iter; imm_use_iterator imm_iter; use_operand_p use_p; - tree var; ! bitmap_clear (blocks); ! FOR_EACH_SSA_TREE_OPERAND (var, stmt, op_iter, SSA_OP_ALL_DEFS) { ! FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var) ! { ! gimple usestmt = USE_STMT (use_p); ! basic_block useblock; ! if (gimple_code (usestmt) == GIMPLE_PHI) ! { ! int idx = PHI_ARG_INDEX_FROM_USE (use_p); ! useblock = gimple_phi_arg_edge (usestmt, idx)->src; ! } ! else if (is_gimple_debug (usestmt)) ! { ! *debug_stmts = true; ! continue; ! } ! else ! { ! useblock = gimple_bb (usestmt); ! } ! /* Short circuit. Nothing dominates the entry block. */ ! if (useblock == ENTRY_BLOCK_PTR_FOR_FN (cfun)) ! { ! BITMAP_FREE (blocks); ! return NULL; ! } ! bitmap_set_bit (blocks, useblock->index); } } commondom = BASIC_BLOCK_FOR_FN (cfun, bitmap_first_set_bit (blocks)); EXECUTE_IF_SET_IN_BITMAP (blocks, 0, j, bi) --- 134,177 ---- /* Find the nearest common dominator of all of the immediate uses in IMM. */ static basic_block ! nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts) { + tree var = DEF_FROM_PTR (def_p); bitmap blocks = BITMAP_ALLOC (NULL); basic_block commondom; unsigned int j; bitmap_iterator bi; imm_use_iterator imm_iter; use_operand_p use_p; ! FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var) { ! gimple usestmt = USE_STMT (use_p); ! basic_block useblock; ! if (gimple_code (usestmt) == GIMPLE_PHI) ! { ! int idx = PHI_ARG_INDEX_FROM_USE (use_p); ! useblock = gimple_phi_arg_edge (usestmt, idx)->src; ! } ! else if (is_gimple_debug (usestmt)) ! { ! *debug_stmts = true; ! continue; ! } ! else ! { ! useblock = gimple_bb (usestmt); ! } ! /* Short circuit. Nothing dominates the entry block. */ ! if (useblock == ENTRY_BLOCK_PTR_FOR_FN (cfun)) ! { ! BITMAP_FREE (blocks); ! return NULL; } + bitmap_set_bit (blocks, useblock->index); } commondom = BASIC_BLOCK_FOR_FN (cfun, bitmap_first_set_bit (blocks)); EXECUTE_IF_SET_IN_BITMAP (blocks, 0, j, bi) *************** statement_sink_location (gimple stmt, ba *** 294,301 **** be seen by an external routine that needs it depending on where it gets moved to. - We don't want to sink loads from memory. - We can't sink statements that end basic blocks without splitting the incoming edge for the sink location to place it there. --- 285,290 ---- *************** statement_sink_location (gimple stmt, ba *** 313,319 **** if (stmt_ends_bb_p (stmt) || gimple_has_side_effects (stmt) || gimple_has_volatile_ops (stmt) - || (gimple_vuse (stmt) && !gimple_vdef (stmt)) || (cfun->has_local_explicit_reg_vars && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)) return false; --- 302,307 ---- *************** statement_sink_location (gimple stmt, ba *** 332,338 **** /* If stmt is a store the one and only use needs to be the VOP merging PHI node. */ ! if (gimple_vdef (stmt)) { FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p)) { --- 320,326 ---- /* If stmt is a store the one and only use needs to be the VOP merging PHI node. */ ! if (virtual_operand_p (DEF_FROM_PTR (def_p))) { FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p)) { *************** statement_sink_location (gimple stmt, ba *** 369,383 **** common dominator of all the immediate uses. For PHI nodes, we have to find the nearest common dominator of all of the predecessor blocks, since that is where insertion would have to take place. */ ! else if (!all_immediate_uses_same_place (stmt)) { bool debug_stmts = false; ! basic_block commondom = nearest_common_dominator_of_uses (stmt, &debug_stmts); if (commondom == frombb) return false; /* Our common dominator has to be dominated by frombb in order to be a trivially safe place to put this statement, since it has multiple uses. */ --- 357,406 ---- common dominator of all the immediate uses. For PHI nodes, we have to find the nearest common dominator of all of the predecessor blocks, since that is where insertion would have to take place. */ ! else if (gimple_vuse (stmt) ! || !all_immediate_uses_same_place (def_p)) { bool debug_stmts = false; ! basic_block commondom = nearest_common_dominator_of_uses (def_p, &debug_stmts); if (commondom == frombb) return false; + /* If this is a load then do not sink past any stores. + ??? This is overly simple but cheap. We basically look + for an existing load with the same VUSE in the path to one + of the sink candidate blocks and we adjust commondom to the + nearest to commondom. */ + if (gimple_vuse (stmt)) + { + imm_use_iterator imm_iter; + use_operand_p use_p; + basic_block found = NULL; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vuse (stmt)) + { + gimple use_stmt = USE_STMT (use_p); + basic_block bb = gimple_bb (use_stmt); + /* For PHI nodes the block we know sth about + is the incoming block with the use. */ + if (gimple_code (use_stmt) == GIMPLE_PHI) + bb = EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src; + /* Any dominator of commondom would be ok with + adjusting commondom to that block. */ + bb = nearest_common_dominator (CDI_DOMINATORS, bb, commondom); + if (!found) + found = bb; + else if (dominated_by_p (CDI_DOMINATORS, bb, found)) + found = bb; + /* If we can't improve, stop. */ + if (found == commondom) + break; + } + commondom = found; + if (commondom == frombb) + return false; + } + /* Our common dominator has to be dominated by frombb in order to be a trivially safe place to put this statement, since it has multiple uses. */ Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c =================================================================== *** /dev/null 1970-01-01 00:00:00.000000000 +0000 --- gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c 2014-06-06 10:31:36.715979043 +0200 *************** *** 0 **** --- 1,20 ---- + /* { dg-do compile } */ + /* { dg-options "-O2 -fdump-tree-sink-details" } */ + + int x[1024], y[1024], z[1024], w[1024]; + void foo (void) + { + int i; + for (i = 1; i < 1024; ++i) + { + int a = x[i]; + int b = y[i]; + int c = x[i-1]; + int d = y[i-1]; + if (w[i]) + z[i] = (a + b) + (c + d); + } + } + + /* { dg-final { scan-tree-dump-times "Sinking # VUSE" 4 "sink" } } */ + /* { dg-final { cleanup-tree-dump "sink" } } */