Hi, I previously noticed from testcases that gcc now does not sink loads from memory in tree-ssa-sink.c. After discussing I worked out a patch to support this in gcc. Please refer to http://gcc.gnu.org/ml/gcc/2012-04/msg00404.html for some info.
I think it is a trivial optimization, but it might be wanted in GCC since less instructions are executed with this. One potential issue might be: Should I disable it when optimizing for size and the sink hurting it. I bootstrapped x86 and tested the patch on x86/arm, no new fail introduced. It is OK for trunk? And comments are welcome. Thanks. gcc/ChangeLog: 2012-05-11 Bin Cheng <bin.ch...@arm.com> * tree-ssa-sink.c (pred_blocks, pred_visited): New static vars. (init_sink_load): New function initializes pred_blocks and pred_visited. (free_sink_load): New function frees pred_blocks and pred_visited. (sink_load_p): New function checks whether load operation should be sunk. (execute_sink_code): Call init_sink_load and free_sink_load. (statement_sink_location): Sink loads from memory if possible. gcc/testsuite/ChangeLog: 2012-05-11 Bin Cheng <bin.ch...@arm.com> * testsuite/gcc.dg/tree-ssa/ssa-sink-10.c: New test. * testsuite/gcc.dg/tree-ssa/ssa-sink-11.c: New test.
Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c (revision 0) @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-sink-stats" } */ +extern int b; +int +foo (int a, int c) +{ + int x = b; + if (c == 1) + return x; + else + return a; +} +/* We should sink load from b into the branch that returns x. */ +/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */ +/* { dg-final { cleanup-tree-dump "sink" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-11.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-11.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-11.c (revision 0) @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-sink-stats" } */ +extern int b; +void bar(); +int +foo (int a, int c) +{ + int x = b; + bar(); + if (c == 1) + return x; + else + return a; +} +/* We should not sink load of b into the branch that returns x, + because it might be clobbered by the call to bar. */ +/* { dg-final { scan-tree-dump-times "Sunk statements:" 0 "sink" } } */ +/* { dg-final { cleanup-tree-dump "sink" } } */ Index: gcc/tree-ssa-sink.c =================================================================== --- gcc/tree-ssa-sink.c (revision 186736) +++ gcc/tree-ssa-sink.c (working copy) @@ -77,6 +77,115 @@ } sink_stats; +/* Stores reversely breadth-first searched block pointer, starting at + the potential destination block of sinking from load. */ +static basic_block *pred_blocks; +/* Bitmap records the visited basic blocks in breadth-first search. */ +static bitmap pred_visited; + +static void +init_sink_load (void) +{ + pred_blocks = XCNEWVEC (basic_block, last_basic_block); + pred_visited = BITMAP_ALLOC (NULL); +} + +static void +free_sink_load (void) +{ + free (pred_blocks); + BITMAP_FREE (pred_visited); +} + +/* Given a gimple STMT in basic block FROM, which is a load operation, + check whether it should be sinked to TOGSI in basic block TO. + Return TRUE if it should be sinked, otherwise return FALSE. + + Loads should not be sunk across stmts that might clobber the reference, + for simplicity, we just stop checking at any store. */ + +static bool +sink_load_p (gimple stmt, basic_block from, basic_block to, + gimple_stmt_iterator *togsi) +{ + int i, vc; + basic_block bb; + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + + /* Stmt should be a load. */ + gcc_assert (gimple_vuse (stmt) && !gimple_vdef (stmt)); + + /* Sink has conflict effect with if-conversion pass, which speculates + loads. So do not sink possibly trapping stmt, which can not be + speculated by if-conversion pass afterward. */ + if (gimple_assign_rhs_could_trap_p (stmt)) + return false; + + /* Sink to not post-dominated basic block, unless it's in outer loop. */ + if (dominated_by_p (CDI_POST_DOMINATORS, from, to) + && (from->loop_depth <= to->loop_depth)) + return false; + + /* If the latch block is empty, don't make it non-empty by sinking + load into it. */ + if (to == from->loop_father->latch && empty_block_p (to)) + return false; + + /* Don't sink stmt if reference might be clobbered by successor stmts. */ + for (gsi_next(&gsi); !gsi_end_p (gsi); gsi_next (&gsi)) + if (gimple_vdef (gsi_stmt (gsi))) + return false; + + /* Don't sink stmt if reference might be clobbered by predecessors of + TOGSI. */ + gsi = *togsi; + if (!gsi_end_p (gsi)) + gsi_prev (&gsi); + + for (; !gsi_end_p (gsi); gsi_prev (&gsi)) + if (gimple_vdef (gsi_stmt (gsi))) + return false; + + /* Don't sink stmt if reference might be clobbered by basic blocks in any + path along FROM to TO. we doing this by reversely breadth-first search + blocks starting at TO and stop checking at any store. */ + + i = 0, vc = 1; + bb = to; + bitmap_clear (pred_visited); + + do + { + edge e; + edge_iterator ei; + + if (bitmap_set_bit (pred_visited, bb->index)) + pred_blocks[i++] = bb; + + FOR_EACH_EDGE (e, ei, bb->preds) + { + basic_block src = e->src; + if (src != from && src != to) + { + gcc_assert (dominated_by_p (CDI_DOMINATORS, src, from)); + if (bitmap_set_bit (pred_visited, src->index)) + pred_blocks[i++] = src; + } + } + + bb = pred_blocks[vc++]; + + /* Check whether there is store in basic block bb. */ + if (i >= vc) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + if (gimple_vdef (gsi_stmt (gsi))) + return false; + + } while (i >= vc); + + return true; +} + /* Given a PHI, and one of its arguments (DEF), find the edge for that argument and return it. If the argument occurs twice in the PHI node, we return NULL. */ @@ -363,7 +472,8 @@ be seen by an external routine that needs it depending on where it gets moved to. - We don't want to sink loads from memory. + We only want to sink loads from memory to not post-dominated basic block, + or basci block in outer loop. We can't sink statements that end basic blocks without splitting the incoming edge for the sink location to place it there. @@ -382,7 +492,6 @@ if (stmt_ends_bb_p (stmt) || gimple_has_side_effects (stmt) || gimple_has_volatile_ops (stmt) - || (gimple_vuse (stmt) && !gimple_vdef (stmt)) || (cfun->has_local_explicit_reg_vars && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)) return false; @@ -448,10 +557,15 @@ commondom = select_best_block (frombb, commondom, stmt); if (commondom == frombb) - return false; + return false; *togsi = gsi_after_labels (commondom); + /* If stmt is a load operation, check whether it should be sinked. */ + if ((gimple_vuse (stmt) && !gimple_vdef (stmt)) + && !sink_load_p (stmt, frombb, commondom, togsi)) + return false; + return true; } else @@ -474,6 +588,11 @@ *togsi = gsi_for_stmt (use); + /* If stmt is a load operation, check whether it should be sinked. */ + if ((gimple_vuse (stmt) && !gimple_vdef (stmt)) + && !sink_load_p (stmt, frombb, sinkbb, togsi)) + return false; + return true; } } @@ -483,7 +602,7 @@ /* This can happen if there are multiple uses in a PHI. */ if (!sinkbb) return false; - + sinkbb = select_best_block (frombb, sinkbb, stmt); if (!sinkbb || sinkbb == frombb) return false; @@ -496,6 +615,11 @@ *togsi = gsi_after_labels (sinkbb); + /* If stmt is a load operation, check whether it should be sinked. */ + if ((gimple_vuse (stmt) && !gimple_vdef (stmt)) + && !sink_load_p (stmt, frombb, sinkbb, togsi)) + return false; + return true; } @@ -633,7 +757,9 @@ memset (&sink_stats, 0, sizeof (sink_stats)); calculate_dominance_info (CDI_DOMINATORS); calculate_dominance_info (CDI_POST_DOMINATORS); + init_sink_load (); sink_code_in_bb (EXIT_BLOCK_PTR); + free_sink_load (); statistics_counter_event (cfun, "Sunk statements", sink_stats.sunk); free_dominance_info (CDI_POST_DOMINATORS); remove_fake_exit_edges ();