Hi, this patch updates tree_unroll_loops_completely to update loop closed SSA. WHen unlooping the loop some basic blocks may move out of the other loops and that makes the need to check their use and add PHIs. Fortunately update_loop_close_ssa already support local updates and thus this can be done quite cheaply by recoridng the blocks in fix_bb_placements and passing it along.
I tried the patch with TODO_update_ssa_no_phi but that causes weird bug in 3 fortran testcases because VOPS seems to not be in the loop closed form. We can track this incrementally I suppose. Bootstrapped/regtested x86_64-linux, OK? Honza PR middle-end/54967 * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Take loop_closed_ssa_invalidated parameter; pass it along. (canonicalize_loop_induction_variables): Update loop closed SSA. (tree_unroll_loops_completely): Likewise. * cfgloop.h (unloop): UPdate prototype. * cfgloopmanip.c (fix_bb_placements): Record BBs updated into optional bitmap. (unloop): Update to pass along loop_closed_ssa_invalidated. * gfortran.dg/pr54967.f90: New testcase. Index: tree-ssa-loop-ivcanon.c =================================================================== --- tree-ssa-loop-ivcanon.c (revision 192632) +++ tree-ssa-loop-ivcanon.c (working copy) @@ -390,13 +390,16 @@ loop_edge_to_cancel (struct loop *loop) EXIT is the exit of the loop that should be eliminated. IRRED_INVALIDATED is used to bookkeep if information about irreducible regions may become invalid as a result - of the transformation. */ + of the transformation. + LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case + when we need to go into loop closed SSA form. */ static bool try_unroll_loop_completely (struct loop *loop, edge exit, tree niter, enum unroll_level ul, - bool *irred_invalidated) + bool *irred_invalidated, + bitmap loop_closed_ssa_invalidated) { unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns; gimple cond; @@ -562,7 +565,7 @@ try_unroll_loop_completely (struct loop locus = latch_edge->goto_locus; /* Unloop destroys the latch edge. */ - unloop (loop, irred_invalidated); + unloop (loop, irred_invalidated, loop_closed_ssa_invalidated); /* Create new basic block for the latch edge destination and wire it in. */ @@ -615,7 +618,8 @@ static bool canonicalize_loop_induction_variables (struct loop *loop, bool create_iv, enum unroll_level ul, bool try_eval, - bool *irred_invalidated) + bool *irred_invalidated, + bitmap loop_closed_ssa_invalidated) { edge exit = NULL; tree niter; @@ -663,7 +667,8 @@ canonicalize_loop_induction_variables (s (int)max_loop_iterations_int (loop)); } - if (try_unroll_loop_completely (loop, exit, niter, ul, irred_invalidated)) + if (try_unroll_loop_completely (loop, exit, niter, ul, irred_invalidated, + loop_closed_ssa_invalidated)) return true; if (create_iv @@ -683,13 +688,15 @@ canonicalize_induction_variables (void) struct loop *loop; bool changed = false; bool irred_invalidated = false; + bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); FOR_EACH_LOOP (li, loop, 0) { changed |= canonicalize_loop_induction_variables (loop, true, UL_SINGLE_ITER, true, - &irred_invalidated); + &irred_invalidated, + loop_closed_ssa_invalidated); } gcc_assert (!need_ssa_update_p (cfun)); @@ -701,6 +708,13 @@ canonicalize_induction_variables (void) evaluation could reveal new information. */ scev_reset (); + if (!bitmap_empty_p (loop_closed_ssa_invalidated)) + { + gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA)); + rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); + } + BITMAP_FREE (loop_closed_ssa_invalidated); + if (changed) return TODO_cleanup_cfg; return 0; @@ -794,11 +808,15 @@ tree_unroll_loops_completely (bool may_i bool changed; enum unroll_level ul; int iteration = 0; + bool irred_invalidated = false; do { - bool irred_invalidated = false; changed = false; + bitmap loop_closed_ssa_invalidated = NULL; + + if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) + loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL); FOR_EACH_LOOP (li, loop, 0) { @@ -812,9 +830,9 @@ tree_unroll_loops_completely (bool may_i else ul = UL_NO_GROWTH; - if (canonicalize_loop_induction_variables (loop, false, ul, - !flag_tree_loop_ivcanon, - &irred_invalidated)) + if (canonicalize_loop_induction_variables + (loop, false, ul, !flag_tree_loop_ivcanon, + &irred_invalidated, loop_closed_ssa_invalidated)) { changed = true; /* If we'll continue unrolling, we need to propagate constants @@ -834,11 +852,14 @@ tree_unroll_loops_completely (bool may_i struct loop **iter; unsigned i; - if (irred_invalidated - && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) - mark_irreducible_loops (); + /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */ - update_ssa (TODO_update_ssa); + if (loop_closed_ssa_invalidated + && !bitmap_empty_p (loop_closed_ssa_invalidated)) + rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated, + TODO_update_ssa); + else + update_ssa (TODO_update_ssa); /* Propagate the constants within the new basic blocks. */ FOR_EACH_VEC_ELT (loop_p, father_stack, i, iter) @@ -861,12 +882,22 @@ tree_unroll_loops_completely (bool may_i /* Clean up the information about numbers of iterations, since complete unrolling might have invalidated it. */ scev_reset (); +#ifdef ENABLE_CHECKING + if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) + verify_loop_closed_ssa (true); +#endif } + if (loop_closed_ssa_invalidated) + BITMAP_FREE (loop_closed_ssa_invalidated); } while (changed && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS)); VEC_free (loop_p, stack, father_stack); + if (irred_invalidated + && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) + mark_irreducible_loops (); + return 0; } Index: cfgloop.h =================================================================== --- cfgloop.h (revision 192632) +++ cfgloop.h (working copy) @@ -321,7 +321,7 @@ extern struct loop *loopify (edge, edge, struct loop * loop_version (struct loop *, void *, basic_block *, unsigned, unsigned, unsigned, bool); extern bool remove_path (edge); -extern void unloop (struct loop *, bool *); +extern void unloop (struct loop *, bool *, bitmap); extern void scale_loop_frequencies (struct loop *, int, int); /* Induction variable analysis. */ Index: cfgloopmanip.c =================================================================== --- cfgloopmanip.c (revision 192632) +++ cfgloopmanip.c (working copy) @@ -36,7 +36,7 @@ static bool rpe_enum_p (const_basic_bloc static int find_path (edge, basic_block **); static void fix_loop_placements (struct loop *, bool *); static bool fix_bb_placement (basic_block); -static void fix_bb_placements (basic_block, bool *); +static void fix_bb_placements (basic_block, bool *, bitmap); /* Checks whether basic block BB is dominated by DATA. */ static bool @@ -159,11 +159,15 @@ fix_loop_placement (struct loop *loop) successors we consider edges coming out of the loops. If the changes may invalidate the information about irreducible regions, - IRRED_INVALIDATED is set to true. */ + IRRED_INVALIDATED is set to true. + + If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with + changed loop_father are collected there. */ static void fix_bb_placements (basic_block from, - bool *irred_invalidated) + bool *irred_invalidated, + bitmap loop_closed_ssa_invalidated) { sbitmap in_queue; basic_block *queue, *qtop, *qbeg, *qend; @@ -218,6 +222,8 @@ fix_bb_placements (basic_block from, /* Ordinary basic block. */ if (!fix_bb_placement (from)) continue; + if (loop_closed_ssa_invalidated) + bitmap_set_bit (loop_closed_ssa_invalidated, from->index); target_loop = from->loop_father; } @@ -312,7 +318,7 @@ remove_path (edge e) { f = loop_outer (l); if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest)) - unloop (l, &irred_invalidated); + unloop (l, &irred_invalidated, NULL); } /* Identify the path. */ @@ -385,7 +391,7 @@ remove_path (edge e) /* Fix placements of basic blocks inside loops and the placement of loops in the loop tree. */ - fix_bb_placements (from, &irred_invalidated); + fix_bb_placements (from, &irred_invalidated, NULL); fix_loop_placements (from->loop_father, &irred_invalidated); if (irred_invalidated @@ -892,10 +898,14 @@ loopify (edge latch_edge, edge header_ed have no successor, which caller is expected to fix somehow. If this may cause the information about irreducible regions to become - invalid, IRRED_INVALIDATED is set to true. */ + invalid, IRRED_INVALIDATED is set to true. + + LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store + basic blocks that had non-trivial update on their loop_father.*/ void -unloop (struct loop *loop, bool *irred_invalidated) +unloop (struct loop *loop, bool *irred_invalidated, + bitmap loop_closed_ssa_invalidated) { basic_block *body; struct loop *ploop; @@ -937,7 +947,7 @@ unloop (struct loop *loop, bool *irred_i /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if there is an irreducible region inside the cancelled loop, the flags will be still correct. */ - fix_bb_placements (latch, &dummy); + fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated); } /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that @@ -965,7 +975,7 @@ fix_loop_placements (struct loop *loop, to the loop. So call fix_bb_placements to fix up the placement of the preheader and (possibly) of its predecessors. */ fix_bb_placements (loop_preheader_edge (loop)->src, - irred_invalidated); + irred_invalidated, NULL); loop = outer; } } Index: testsuite/gfortran.dg/pr54967.f90 =================================================================== --- testsuite/gfortran.dg/pr54967.f90 (revision 0) +++ testsuite/gfortran.dg/pr54967.f90 (revision 0) @@ -0,0 +1,36 @@ + SUBROUTINE calc_S_derivs() + INTEGER, DIMENSION(6, 2) :: c_map_mat + INTEGER, DIMENSION(:), POINTER:: C_mat + DO j=1,3 + DO m=j,3 + n=n+1 + c_map_mat(n,1)=j + IF(m==j)CYCLE + c_map_mat(n,2)=m + END DO + END DO + DO m=1,6 + DO j=1,2 + IF(c_map_mat(m,j)==0)CYCLE + CALL foo(C_mat(c_map_mat(m,j))) + END DO + END DO + END SUBROUTINE calc_S_derivs