Hi, This patch handles duplicating of the last iteration correctly. The current code always duplicates the complete "static" iteration from the entry to the latch, and then sets the number of iterations according to the pattern of the loop (according to whether the cond before the body, or the body before the cond).
The correct way to go about is to not assume anything about the control flow of the loop, and duplicate the last iteration only from entry to the block consisting of the cond, that is the real last dynamic iteration that would be executed. The number of iterations then needs no special care for each loop pattern. This was actually Zdenek's original intent by duplicating the last iteration. This code allows us to remove the do_while cond, and gets PR46886 resolved (instead of the solution suggested in http://gcc.gnu.org/ml/gcc-patches/2012-03/msg01642.html, which handled the number of iterations according to the specific shape of the loop) Bootstrap and testsuite pass successfully. Testsuite with -ftree-parallelize-loops=4 shows only one regression which is uninit-17.c test for warnings, which seems unrelated to how the loop gets parallelized. I also ran spec-2006, and it showed no regressions. 2012-04-20 Razya Ladelsky <ra...@il.ibm.com> PR tree-optimization/46886 * tree-parloops.c (transform_to_exit_first_loop): Remove setting of number of iterations according to the loop pattern. Duplicate from entry to exit->src instead of loop->latch. (pallelize_loops): Remove the condition preventing do-while loops. * tree-cfg.c (bool bb_in_region_p): New. (gimple_duplicate_sese_tail): Adjust duplication of the the subloops. Adjust redirection of the duplicated iteration. O.K to commit? Thanks, Razya
Index: tree-parloops.c =================================================================== --- tree-parloops.c (revision 186493) +++ tree-parloops.c (working copy) @@ -1481,8 +1481,6 @@ transform_to_exit_first_loop (struct loop *loop, h gimple phi, nphi, cond_stmt, stmt, cond_nit; gimple_stmt_iterator gsi; tree nit_1; - edge exit_1; - tree new_rhs; split_block_after_labels (loop->header); orig_header = single_succ (loop->header); @@ -1512,41 +1510,10 @@ transform_to_exit_first_loop (struct loop *loop, h } } - /* Setting the condition towards peeling the last iteration: - If the block consisting of the exit condition has the latch as - successor, then the body of the loop is executed before - the exit condition is tested. In such case, moving the - condition to the entry, causes that the loop will iterate - one less iteration (which is the wanted outcome, since we - peel out the last iteration). If the body is executed after - the condition, moving the condition to the entry requires - decrementing one iteration. */ - exit_1 = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit); - if (exit_1->dest == loop->latch) - new_rhs = gimple_cond_rhs (cond_stmt); - else - { - new_rhs = fold_build2 (MINUS_EXPR, TREE_TYPE (gimple_cond_rhs (cond_stmt)), - gimple_cond_rhs (cond_stmt), - build_int_cst (TREE_TYPE (gimple_cond_rhs (cond_stmt)), 1)); - if (TREE_CODE (gimple_cond_rhs (cond_stmt)) == SSA_NAME) - { - basic_block preheader; - gimple_stmt_iterator gsi1; - - preheader = loop_preheader_edge(loop)->src; - gsi1 = gsi_after_labels (preheader); - new_rhs = force_gimple_operand_gsi (&gsi1, new_rhs, true, - NULL_TREE,false,GSI_CONTINUE_LINKING); - } - } - gimple_cond_set_rhs (cond_stmt, unshare_expr (new_rhs)); - gimple_cond_set_lhs (cond_stmt, unshare_expr (gimple_cond_lhs (cond_stmt))); - bbs = get_loop_body_in_dom_order (loop); - for (n = 0; bbs[n] != loop->latch; n++) - continue; + for (n = 0; bbs[n] != exit->src; n++) + continue; nbbs = XNEWVEC (basic_block, n); ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit, bbs + 1, n, nbbs); @@ -1599,6 +1566,7 @@ transform_to_exit_first_loop (struct loop *loop, h /* Initialize the control variable to number of iterations according to the rhs of the exit condition. */ gsi = gsi_after_labels (ex_bb); + exit = single_dom_exit (loop); cond_nit = last_stmt (exit->src); nit_1 = gimple_cond_rhs (cond_nit); nit_1 = force_gimple_operand_gsi (&gsi, @@ -1861,8 +1829,8 @@ gen_parallel_loop (struct loop *loop, htab_t reduc /* Ensure that the exit condition is the first statement in the loop. */ transform_to_exit_first_loop (loop, reduction_list, nit); - /* Generate initializations for reductions. */ + if (htab_elements (reduction_list) > 0) htab_traverse (reduction_list, initialize_reductions, loop); @@ -2187,10 +2155,9 @@ parallelize_loops (void) || loop_has_blocks_with_irreducible_flag (loop) || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP) /* FIXME: the check for vector phi nodes could be removed. */ - || loop_has_vector_phi_nodes (loop) + || loop_has_vector_phi_nodes (loop)) /* FIXME: transform_to_exit_first_loop does not handle not header-copied loops correctly - see PR46886. */ - || !do_while_loop_p (loop)) continue; estimated = estimated_stmt_executions_int (loop); Index: tree-cfg.c =================================================================== --- tree-cfg.c (revision 186493) +++ tree-cfg.c (working copy) @@ -5595,6 +5595,18 @@ gimple_duplicate_sese_region (edge entry, edge exi return true; } +bool bb_in_region_p (basic_block bb, basic_block* bbs, unsigned n_region) +{ + unsigned int n; + + for (n = 0; n < n_region; n++) + { + if (bb == bbs[n]) + return true; + } + return false; +} + /* Duplicates REGION consisting of N_REGION blocks. The new blocks are stored to REGION_COPY in the same order in that they appear in REGION, if REGION_COPY is not NULL. ENTRY is the entry to @@ -5645,6 +5657,7 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_U gimple_stmt_iterator psi; gimple phi; tree def; + struct loop *target, *aloop, *cloop; gcc_assert (EDGE_COUNT (exit->src->succs) == 2); exits[0] = exit; @@ -5655,8 +5668,17 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_U initialize_original_copy_tables (); set_loop_copy (orig_loop, loop); - duplicate_subloops (orig_loop, loop); + target= loop; + for (aloop = orig_loop->inner; aloop; aloop = aloop->next) + { + if (bb_in_region_p (aloop->header, region, n_region)) + { + cloop = duplicate_loop (aloop, target); + duplicate_subloops (aloop, cloop); + } + } + if (!region_copy) { region_copy = XNEWVEC (basic_block, n_region); @@ -5758,7 +5780,7 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_U add_phi_arg (phi, def, e, gimple_phi_arg_location_from_edge (phi, e)); } } - e = redirect_edge_and_branch (nexits[0], nexits[1]->dest); + e = redirect_edge_and_branch (nexits[1], nexits[0]->dest); PENDING_STMT (e) = NULL; /* Anything that is outside of the region, but was dominated by something =