This fixes the two remaining dataflow iteration order issues in lcm.c. On the testcase from PR46590 (long function with many loops) we get an improvement from
PRE : 13.85 (42%) usr 0.08 (16%) sys 13.93 (41%) wall 1067 kB ( 1%) ggc TOTAL : 33.35 0.50 33.87 207511 kB to PRE : 6.27 (24%) usr 0.03 ( 8%) sys 6.31 (24%) wall 1067 kB ( 1%) ggc TOTAL : 25.62 0.36 25.97 207511 kB Bootstrapped and tested on x86_64-unknown-linux, applied to trunk. Richard. 2014-01-16 Richard Biener <rguent...@suse.de> PR rtl-optimization/46590 * lcm.c (compute_antinout_edge): Use postorder iteration. (compute_laterin): Use inverted postorder iteration. Index: gcc/lcm.c =================================================================== *** gcc/lcm.c (revision 206659) --- gcc/lcm.c (working copy) *************** compute_antinout_edge (sbitmap *antloc, *** 109,119 **** /* Put every block on the worklist; this is necessary because of the optimistic initialization of ANTIN above. */ ! FOR_EACH_BB_REVERSE_FN (bb, cfun) { *qin++ = bb; bb->aux = bb; } qin = worklist; qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; --- 109,123 ---- /* Put every block on the worklist; this is necessary because of the optimistic initialization of ANTIN above. */ ! int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); ! int postorder_num = post_order_compute (postorder, false, false); ! for (int i = 0; i < postorder_num; ++i) { + bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); *qin++ = bb; bb->aux = bb; } + free (postorder); qin = worklist; qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; *************** compute_laterin (struct edge_list *edge_ *** 281,291 **** /* Add all the blocks to the worklist. This prevents an early exit from the loop given our optimistic initialization of LATER above. */ ! FOR_EACH_BB_FN (bb, cfun) { *qin++ = bb; bb->aux = bb; } /* Note that we do not use the last allocated element for our queue, as EXIT_BLOCK is never inserted into it. */ --- 285,302 ---- /* Add all the blocks to the worklist. This prevents an early exit from the loop given our optimistic initialization of LATER above. */ ! int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); ! int postorder_num = inverted_post_order_compute (postorder); ! for (int i = 0; i < postorder_num; ++i) { + bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); + if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun) + || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) + continue; *qin++ = bb; bb->aux = bb; } + free (postorder); /* Note that we do not use the last allocated element for our queue, as EXIT_BLOCK is never inserted into it. */ *************** compute_laterin (struct edge_list *edge_ *** 307,320 **** bitmap_ones (laterin[bb->index]); FOR_EACH_EDGE (e, ei, bb->preds) bitmap_and (laterin[bb->index], laterin[bb->index], ! later[(size_t)e->aux]); /* Calculate LATER for all outgoing edges. */ FOR_EACH_EDGE (e, ei, bb->succs) if (bitmap_ior_and_compl (later[(size_t) e->aux], ! earliest[(size_t) e->aux], ! laterin[e->src->index], ! antloc[e->src->index]) /* If LATER for an outgoing edge was changed, then we need to add the target of the outgoing edge to the worklist. */ && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest->aux == 0) --- 318,331 ---- bitmap_ones (laterin[bb->index]); FOR_EACH_EDGE (e, ei, bb->preds) bitmap_and (laterin[bb->index], laterin[bb->index], ! later[(size_t)e->aux]); /* Calculate LATER for all outgoing edges. */ FOR_EACH_EDGE (e, ei, bb->succs) if (bitmap_ior_and_compl (later[(size_t) e->aux], ! earliest[(size_t) e->aux], ! laterin[bb->index], ! antloc[bb->index]) /* If LATER for an outgoing edge was changed, then we need to add the target of the outgoing edge to the worklist. */ && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest->aux == 0) *************** compute_laterin (struct edge_list *edge_ *** 333,340 **** bitmap_ones (laterin[last_basic_block_for_fn (cfun)]); FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) bitmap_and (laterin[last_basic_block_for_fn (cfun)], ! laterin[last_basic_block_for_fn (cfun)], ! later[(size_t) e->aux]); clear_aux_for_edges (); free (worklist); --- 344,351 ---- bitmap_ones (laterin[last_basic_block_for_fn (cfun)]); FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) bitmap_and (laterin[last_basic_block_for_fn (cfun)], ! laterin[last_basic_block_for_fn (cfun)], ! later[(size_t) e->aux]); clear_aux_for_edges (); free (worklist);