This avoids loop_version () calls when if-conversion already versioned the loops and simplifies vect_loop_versioning because we need not do as much fixup. There's followup work to do for the profile scaling. Honza - any suggestion on how to apply a different true/false profile to an existing condition and its branches?
I've built SPEC 2006 with this and see 132 loop versions re-used from 3254 versionings done by the vectorizer (most loops do not need if-conversion). Bootstrap / regtest running on x86_64-unknown-linux-gnu. Richard. * tree-vectorizer.h (vect_loop_vectorized_call): Declare. * tree-vectorizer.c (vect_loop_vectorized_call): Export and also return the condition stmt. * tree-vect-loop-manip.c (vect_loop_versioning): Reuse the loop version created by if-conversion instead of versioning again. diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index b3fae5ba4da..be4b95a14a1 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -3032,7 +3032,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo, vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr); if (cond_expr) - cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list, + cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr), + &cond_expr_stmt_list, is_gimple_condexpr, NULL_TREE); if (version_align) @@ -3076,45 +3077,77 @@ vect_loop_versioning (loop_vec_info loop_vinfo, is_gimple_condexpr, NULL_TREE); gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list); - initialize_original_copy_tables (); if (scalar_loop) { - edge scalar_e; - basic_block preheader, scalar_preheader; + gcond *cond; + gimple *call; + if (!(call = vect_loop_vectorized_call (scalar_loop, &cond))) + gcc_unreachable (); + condition_bb = gimple_bb (cond); + gimple_cond_set_condition_from_tree (cond, cond_expr); + update_stmt (cond); - /* We don't want to scale SCALAR_LOOP's frequencies, we need to - scale LOOP's frequencies instead. */ - nloop = loop_version (scalar_loop, cond_expr, &condition_bb, - prob, prob.invert (), prob, prob.invert (), true); - scale_loop_frequencies (loop, prob); - /* CONDITION_BB was created above SCALAR_LOOP's preheader, - while we need to move it above LOOP's preheader. */ - e = loop_preheader_edge (loop); - scalar_e = loop_preheader_edge (scalar_loop); - /* The vector loop preheader might not be empty, since new - invariants could have been created while analyzing the loop. */ - gcc_assert (single_pred_p (e->src)); - gcc_assert (empty_block_p (scalar_e->src) - && single_pred_p (scalar_e->src)); - gcc_assert (single_pred_p (condition_bb)); - preheader = e->src; - scalar_preheader = scalar_e->src; - scalar_e = find_edge (condition_bb, scalar_preheader); - e = single_pred_edge (preheader); - redirect_edge_and_branch_force (single_pred_edge (condition_bb), - scalar_preheader); - redirect_edge_and_branch_force (scalar_e, preheader); - redirect_edge_and_branch_force (e, condition_bb); - set_immediate_dominator (CDI_DOMINATORS, condition_bb, - single_pred (condition_bb)); - set_immediate_dominator (CDI_DOMINATORS, scalar_preheader, - single_pred (scalar_preheader)); - set_immediate_dominator (CDI_DOMINATORS, preheader, - condition_bb); + if (cond_expr_stmt_list) + { + cond_exp_gsi = gsi_for_stmt (call); + gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list, + GSI_SAME_STMT); + } + + /* ??? if-conversion uses profile_probability::always () but + prob below is profile_probability::likely (). */ + nloop = scalar_loop; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "reusing loop version created by if conversion\n"); } else - nloop = loop_version (loop, cond_expr, &condition_bb, - prob, prob.invert (), prob, prob.invert (), true); + { + initialize_original_copy_tables (); + nloop = loop_version (loop, cond_expr, &condition_bb, + prob, prob.invert (), prob, prob.invert (), true); + free_original_copy_tables (); + + if (cond_expr_stmt_list) + { + cond_exp_gsi = gsi_last_bb (condition_bb); + gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list, + GSI_SAME_STMT); + } + + /* Loop versioning violates an assumption we try to maintain during + vectorization - that the loop exit block has a single predecessor. + After versioning, the exit block of both loop versions is the same + basic block (i.e. it has two predecessors). Just in order to simplify + following transformations in the vectorizer, we fix this situation + here by adding a new (empty) block on the exit-edge of the loop, + with the proper loop-exit phis to maintain loop-closed-form. + If loop versioning wasn't done from loop, but scalar_loop instead, + merge_bb will have already just a single successor. */ + + merge_bb = single_exit (loop)->dest; + if (EDGE_COUNT (merge_bb->preds) >= 2) + { + gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2); + new_exit_bb = split_edge (single_exit (loop)); + new_exit_e = single_exit (loop); + e = EDGE_SUCC (new_exit_bb, 0); + + for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + tree new_res; + orig_phi = gsi.phi (); + new_res = copy_ssa_name (PHI_RESULT (orig_phi)); + new_phi = create_phi_node (new_res, new_exit_bb); + arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); + add_phi_arg (new_phi, arg, new_exit_e, + gimple_phi_arg_location_from_edge (orig_phi, e)); + adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi)); + } + } + + update_ssa (TODO_update_ssa); + } if (version_niter) { @@ -3141,48 +3174,6 @@ vect_loop_versioning (loop_vec_info loop_vinfo, "alignment\n"); } - free_original_copy_tables (); - - /* Loop versioning violates an assumption we try to maintain during - vectorization - that the loop exit block has a single predecessor. - After versioning, the exit block of both loop versions is the same - basic block (i.e. it has two predecessors). Just in order to simplify - following transformations in the vectorizer, we fix this situation - here by adding a new (empty) block on the exit-edge of the loop, - with the proper loop-exit phis to maintain loop-closed-form. - If loop versioning wasn't done from loop, but scalar_loop instead, - merge_bb will have already just a single successor. */ - - merge_bb = single_exit (loop)->dest; - if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2) - { - gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2); - new_exit_bb = split_edge (single_exit (loop)); - new_exit_e = single_exit (loop); - e = EDGE_SUCC (new_exit_bb, 0); - - for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - tree new_res; - orig_phi = gsi.phi (); - new_res = copy_ssa_name (PHI_RESULT (orig_phi)); - new_phi = create_phi_node (new_res, new_exit_bb); - arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); - add_phi_arg (new_phi, arg, new_exit_e, - gimple_phi_arg_location_from_edge (orig_phi, e)); - adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi)); - } - } - - /* End loop-exit-fixes after versioning. */ - - if (cond_expr_stmt_list) - { - cond_exp_gsi = gsi_last_bb (condition_bb); - gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list, - GSI_SAME_STMT); - } - update_ssa (TODO_update_ssa); return nloop; } diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 4f6c65faf64..325ef58722d 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -727,8 +727,8 @@ vect_free_loop_info_assumptions (struct loop *loop) /* If LOOP has been versioned during ifcvt, return the internal call guarding it. */ -static gimple * -vect_loop_vectorized_call (struct loop *loop) +gimple * +vect_loop_vectorized_call (struct loop *loop, gcond **cond) { basic_block bb = loop_preheader_edge (loop)->src; gimple *g; @@ -744,6 +744,8 @@ vect_loop_vectorized_call (struct loop *loop) while (1); if (g && gimple_code (g) == GIMPLE_COND) { + if (cond) + *cond = as_a <gcond *> (g); gimple_stmt_iterator gsi = gsi_for_stmt (g); gsi_prev (&gsi); if (!gsi_end_p (gsi)) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 4db30ccc22b..6713b895091 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1650,5 +1650,7 @@ void vect_pattern_recog (vec_info *); /* In tree-vectorizer.c. */ unsigned vectorize_loops (void); void vect_free_loop_info_assumptions (struct loop *); +gimple *vect_loop_vectorized_call (struct loop *, gcond **cond = NULL); + #endif /* GCC_TREE_VECTORIZER_H */