On Fri, 24 Nov 2023, Tamar Christina wrote: > Hi, > > Having simplified peeling this patch becomes smaller as well: > > This changes the PHI node updates to support early breaks. > It has to support both the case where the loop's exit matches the normal loop > exit and one where the early exit is "inverted", i.e. it's an early exit edge. > > In the latter case we must always restart the loop for VF iterations. For an > early exit the reason is obvious, but there are cases where the "normal" exit > is located before the early one. This exit then does a check on ivtmp > resulting > in us leaving the loop since it thinks we're done. > > In these case we may still have side-effects to perform so we also go to the > scalar loop. > > For the "normal" exit niters has already been adjusted for peeling, for the > early exits we must find out how many iterations we actually did. So we have > to recalculate the new position for each exit. > > For the "inverse" case we essentially peel a vector iteration *after* the > vector > loop has finished. i.e. conceptually it's the same as vect epilogue peeling > but > without generating code for the peeled iteration. That'll be handled by the > scalar loop. > > To do this we just adjust niters_vector_mult_vf and remove one VF and for > masked > cases we do the same with final_iv. > > The normal IV update code will then generate the correct values for us. > Eventually VRP will simplify the constant bounds and we get the proper scalar > unrolling. This means we don't have to make any changes at all to > vect_update_ivs_after_vectorizer but dropping some asserts. > > Ok for master?
Nice. OK. Thanks, Richard. > Thanks, > Tamar > > gcc/ChangeLog: > > * tree-vect-loop-manip.cc (vect_set_loop_condition_partial_vectors, > vect_set_loop_condition_partial_vectors_avx512, > vect_gen_vector_loop_niters_mult_vf): Support peeling a vector > iteration. > (vect_update_ivs_after_vectorizer): Drop asserts. > (vect_do_peeling): Skip forwarder edge. > (vect_is_loop_exit_latch_pred): New. > * tree-vectorizer.h (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED): New. > > --- inline copy of patch --- > > diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc > index > d61d7c3a189b279fc3bcbb58c3c0e32521db3cf8..476be8a0bb6da2d06c4ca7052cb07bacecca60b1 > 100644 > --- a/gcc/tree-vect-loop-manip.cc > +++ b/gcc/tree-vect-loop-manip.cc > @@ -951,7 +951,18 @@ vect_set_loop_condition_partial_vectors (class loop > *loop, edge exit_edge, > > if (final_iv) > { > - gassign *assign = gimple_build_assign (final_iv, orig_niters); > + gassign *assign; > + /* If vectorizing an inverted early break loop we have to restart the > + scalar loop at niters - vf. This matches what we do in > + vect_gen_vector_loop_niters_mult_vf for non-masked loops. */ > + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)) > + { > + tree ftype = TREE_TYPE (orig_niters); > + tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo)); > + assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf); > + } > + else > + assign = gimple_build_assign (final_iv, orig_niters); > gsi_insert_on_edge_immediate (exit_edge, assign); > } > > @@ -1188,8 +1199,19 @@ vect_set_loop_condition_partial_vectors_avx512 (class > loop *loop, > > if (final_iv) > { > - gassign *assign = gimple_build_assign (final_iv, orig_niters); > - gsi_insert_on_edge_immediate (single_exit (loop), assign); > + gassign *assign; > + /* If vectorizing an inverted early break loop we have to restart the > + scalar loop at niters - vf. This matches what we do in > + vect_gen_vector_loop_niters_mult_vf for non-masked loops. */ > + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)) > + { > + tree ftype = TREE_TYPE (orig_niters); > + tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo)); > + assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf); > + } > + else > + assign = gimple_build_assign (final_iv, orig_niters); > + gsi_insert_on_edge_immediate (exit_edge, assign); > } > > return cond_stmt; > @@ -2157,11 +2179,8 @@ vect_update_ivs_after_vectorizer (loop_vec_info > loop_vinfo, > gphi_iterator gsi, gsi1; > class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > basic_block update_bb = update_e->dest; > - > basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest; > - > - /* Make sure there exists a single-predecessor exit bb: */ > - gcc_assert (single_pred_p (exit_bb)); > + gimple_stmt_iterator last_gsi = gsi_last_bb (exit_bb); > > for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis > (update_bb); > !gsi_end_p (gsi) && !gsi_end_p (gsi1); > @@ -2171,7 +2190,6 @@ vect_update_ivs_after_vectorizer (loop_vec_info > loop_vinfo, > tree step_expr, off; > tree type; > tree var, ni, ni_name; > - gimple_stmt_iterator last_gsi; > > gphi *phi = gsi.phi (); > gphi *phi1 = gsi1.phi (); > @@ -2207,7 +2225,8 @@ vect_update_ivs_after_vectorizer (loop_vec_info > loop_vinfo, > { > tree stype = TREE_TYPE (step_expr); > off = fold_build2 (MULT_EXPR, stype, > - fold_convert (stype, niters), step_expr); > + fold_convert (stype, niters), step_expr); > + > if (POINTER_TYPE_P (type)) > ni = fold_build_pointer_plus (init_expr, off); > else > @@ -2226,9 +2245,9 @@ vect_update_ivs_after_vectorizer (loop_vec_info > loop_vinfo, > > var = create_tmp_var (type, "tmp"); > > - last_gsi = gsi_last_bb (exit_bb); > gimple_seq new_stmts = NULL; > ni_name = force_gimple_operand (ni, &new_stmts, false, var); > + > /* Exit_bb shouldn't be empty. */ > if (!gsi_end_p (last_gsi)) > { > @@ -2726,11 +2745,19 @@ vect_gen_vector_loop_niters_mult_vf (loop_vec_info > loop_vinfo, > int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant (); > tree type = TREE_TYPE (niters_vector); > tree log_vf = build_int_cst (type, exact_log2 (vf)); > + tree tree_vf = build_int_cst (type, vf); > basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest; > > gcc_assert (niters_vector_mult_vf_ptr != NULL); > tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type, > niters_vector, log_vf); > + > + /* If we've peeled a vector iteration then subtract one full vector > + iteration. */ > + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)) > + niters_vector_mult_vf = fold_build2 (MINUS_EXPR, type, > + niters_vector_mult_vf, tree_vf); > + > if (!is_gimple_val (niters_vector_mult_vf)) > { > tree var = create_tmp_var (type, "niters_vector_mult_vf"); > @@ -3328,6 +3355,10 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree > niters, tree nitersm1, > niters_vector_mult_vf steps. */ > gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo)); > update_e = skip_vector ? e : loop_preheader_edge (epilog); > + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) > + update_e = single_succ_edge (e->dest); > + > + /* Update the main exit. */ > vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf, > update_e); > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index > 39aa4d1250efe308acccf484d370f8adfd1ba843..de60da31e2a3030a7fbc302d3f676af9683fd019 > 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -1016,6 +1016,8 @@ public: > #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps > #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter > #define LOOP_VINFO_EARLY_BREAKS(L) (L)->early_breaks > +#define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L) \ > + (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src) > #define LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS(L) (L)->early_break_conflict > #define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb > #define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses > @@ -2224,6 +2226,7 @@ extern dump_user_location_t find_loop_location (class > loop *); > extern bool vect_can_advance_ivs_p (loop_vec_info); > extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code); > extern edge vec_init_loop_exit_info (class loop *); > +extern void vect_iv_increment_position (edge, gimple_stmt_iterator *, bool > *); > > /* In tree-vect-stmts.cc. */ > extern tree get_related_vectype_for_scalar_type (machine_mode, tree, > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)