On Thu, 13 Jul 2023, Tamar Christina wrote:
> > e7ac2b5f3db55de3dbbab7bd2bfe08388f4ec533..cab82d7960e5be517bba2
> > 621f7f4
> > > 888e7bf3c295 100644
> > > --- a/gcc/cfgloop.h
> > > +++ b/gcc/cfgloop.h
> > > @@ -272,6 +272,14 @@ public:
> > > the basic-block from being collected but its index can still be
> > > reused. */
> > > basic_block former_header;
> > > +
> > > + /* The controlling loop IV for the current loop when vectorizing.
> > > This IV
> > > + controls the natural exits of the loop. */ edge GTY ((skip
> > > + (""))) vec_loop_iv;
> > > +
> > > + /* If the loop has multiple exits this structure contains the alternate
> > > + exits of the loop which are relevant for vectorization. */
> > > + vec<edge> GTY ((skip (""))) vec_loop_alt_exits;
> >
> > That's a quite heavy representation and as you say it's vectorizer
> > specific. May
> > I ask you to eliminate at _least_ vec_loop_alt_exits?
> > Are there not all exits in that vector? Note there's already the list of
> > exits and if
> > you have the canonical counting IV exit you can match against that to get
> > all
> > the others?
> >
>
> Sure, though that means some filtering whenever one iterates over the alt
> exits,
> not a problem though.
>
> > > /* Given LOOP this function generates a new copy of it and puts it
> > > on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
> > > @@ -1458,13 +1523,15 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class
> > loop *loop,
> > > edge exit, new_exit;
> > > bool duplicate_outer_loop = false;
> > >
> > > - exit = single_exit (loop);
> > > + exit = loop->vec_loop_iv;
> > > at_exit = (e == exit);
> > > if (!at_exit && e != loop_preheader_edge (loop))
> > > return NULL;
> > >
> > > if (scalar_loop == NULL)
> > > scalar_loop = loop;
> > > + else
> > > + vec_init_exit_info (scalar_loop);
> > >
> > > bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
> > > pbbs = bbs + 1;
> > > @@ -1490,13 +1557,17 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class
> > loop *loop,
> > > bbs[0] = preheader;
> > > new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
> > >
> > > - exit = single_exit (scalar_loop);
> > > + exit = scalar_loop->vec_loop_iv;
> > > copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
> > > &exit, 1, &new_exit, NULL,
> > > at_exit ? loop->latch : e->src, true);
> > > - exit = single_exit (loop);
> > > + exit = loop->vec_loop_iv;
> > > basic_block new_preheader = new_bbs[0];
> > >
> > > + /* Record the new loop exit information. new_loop doesn't have SCEV
> > data and
> > > + so we must initialize the exit information. */
> > > + vec_init_exit_info (new_loop);
> > > +
> >
> > You have a mapping of old to new BB so you should be able to
> > map old to new exit by mapping e->src/dest and looking up the new edge?
> >
> > The vec_loop_iv exit is mapped directly (new_exit).
> >
> > So I don't really understand what's missing there.
>
> But I don't have the mapping when the loop as versioned, e.g. by ifcvt. So
> in the cases
> where scalar_loop != loop in which case I still need them to match up.
>
> vect_loop_form_info is destroyed after analysis though and is not available
> during
> peeling. That's why we copy relevant information out in
> vect_create_loop_vinfo.
>
> But in general we only have 1 per loop as well, so it would be the same as
> using loop_vinfo.
>
> I could move it into loop_vinfo and then require you to pass the edges to the
> peeling function
> as you mentioned. This would solve the location we place them in, but still
> not sure what to do
> about versioned loops. Would need to get its main edge "somewhere", would
> another field in
> loop_vinfo be ok?
I suppose since we're having ->scalar_loop adding ->scalar_loop_iv_exit
is straight-forward indeed. As for matching them up I don't see how
you do that reliably right now? It might be even that the if-converted
loop has one of the exits removed as unreachable (since we run VN
on its body) ...
What I could see working (but ick) is to extend the contract between
if-conversion and vectorization and for example record corresponding exit
numbers in exits. We have conveniently (*cough*) unused edge->aux
for this. If you assign numbers to all edges of the original
loop the loop copies should inherit those (if I traced things
correctly - duplicate_block copies edge->aux but not bb->aux).
So in the vectorizer you could then match them up.
Richard.
> Cheers,
> Tamar
>
> > > + if (!loop->vec_loop_iv)
> > > + return opt_result::failure_at (vect_location,
> > > + "not vectorized:"
> > > + " could not determine main exit from"
> > > + " loop with multiple exits.\n");
> > > +
> > > /* Different restrictions apply when we are considering an inner-most
> > > loop,
> > > vs. an outer (nested) loop.
> > > (FORNOW. May want to relax some of these restrictions in the
> > > future). */
> > > @@ -3025,9 +3032,8 @@ start_over:
> > > if (dump_enabled_p ())
> > > dump_printf_loc (MSG_NOTE, vect_location, "epilog loop
> > > required\n");
> > > if (!vect_can_advance_ivs_p (loop_vinfo)
> > > - || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
> > > - single_exit (LOOP_VINFO_LOOP
> > > - (loop_vinfo))))
> > > + || !slpeel_can_duplicate_loop_p (loop_vinfo,
> > > + LOOP_VINFO_IV_EXIT (loop_vinfo)))
> > > {
> > > ok = opt_result::failure_at (vect_location,
> > > "not vectorized: can't create required "
> > > @@ -5964,7 +5970,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> > loop_vinfo,
> > > Store them in NEW_PHIS. */
> > > if (double_reduc)
> > > loop = outer_loop;
> > > - exit_bb = single_exit (loop)->dest;
> > > + exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
> > > exit_gsi = gsi_after_labels (exit_bb);
> > > reduc_inputs.create (slp_node ? vec_num : ncopies);
> > > for (unsigned i = 0; i < vec_num; i++)
> > > @@ -5980,7 +5986,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> > loop_vinfo,
> > > phi = create_phi_node (new_def, exit_bb);
> > > if (j)
> > > def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]);
> > > - SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
> > > + SET_PHI_ARG_DEF (phi, LOOP_VINFO_IV_EXIT (loop_vinfo)-
> > >dest_idx, def);
> > > new_def = gimple_convert (&stmts, vectype, new_def);
> > > reduc_inputs.quick_push (new_def);
> > > }
> > > @@ -10301,12 +10307,12 @@ vectorizable_live_operation (vec_info
> > *vinfo,
> > > lhs' = new_tree; */
> > >
> > > class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> > > - basic_block exit_bb = single_exit (loop)->dest;
> > > + basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
> > > gcc_assert (single_pred_p (exit_bb));
> > >
> > > tree vec_lhs_phi = copy_ssa_name (vec_lhs);
> > > gimple *phi = create_phi_node (vec_lhs_phi, exit_bb);
> > > - SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, vec_lhs);
> > > + SET_PHI_ARG_DEF (phi, LOOP_VINFO_IV_EXIT (loop_vinfo)->dest_idx,
> > vec_lhs);
> > >
> > > gimple_seq stmts = NULL;
> > > tree new_tree;
> > > @@ -10829,7 +10835,8 @@ scale_profile_for_vect_loop (class loop *loop,
> > unsigned vf)
> > > scale_loop_frequencies (loop, p);
> > > }
> > >
> > > - edge exit_e = single_exit (loop);
> > > + edge exit_e = loop->vec_loop_iv;
> > > +
> > > exit_e->probability = profile_probability::always () / (new_est_niter
> > > + 1);
> > >
> > > edge exit_l = single_pred_edge (loop->latch);
> > > @@ -11177,7 +11184,7 @@ vect_transform_loop (loop_vec_info
> > loop_vinfo, gimple *loop_vectorized_call)
> > >
> > > /* Make sure there exists a single-predecessor exit bb. Do this before
> > > versioning. */
> > > - edge e = single_exit (loop);
> > > + edge e = LOOP_VINFO_IV_EXIT (loop_vinfo);
> > > if (! single_pred_p (e->dest))
> > > {
> > > split_loop_exit_edge (e, true);
> > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > > index
> > a36974c2c0d2103b0a2d0397d06ab84dace08129..bd5eceb5da7a45ef036c
> > d14609ebe091799320bf 100644
> > > --- a/gcc/tree-vectorizer.h
> > > +++ b/gcc/tree-vectorizer.h
> > > @@ -917,6 +917,8 @@ public:
> > >
> > > /* Access Functions. */
> > > #define LOOP_VINFO_LOOP(L) (L)->loop
> > > +#define LOOP_VINFO_IV_EXIT(L) (L)->loop->vec_loop_iv
> > > +#define LOOP_VINFO_ALT_EXITS(L) (L)->loop->vec_loop_alt_exits
> > > #define LOOP_VINFO_BBS(L) (L)->bbs
> > > #define LOOP_VINFO_NITERSM1(L) (L)->num_itersm1
> > > #define LOOP_VINFO_NITERS(L) (L)->num_iters
> > > @@ -2162,6 +2164,7 @@ extern void vect_prepare_for_masked_peels
> > (loop_vec_info);
> > > extern dump_user_location_t find_loop_location (class loop *);
> > > extern bool vect_can_advance_ivs_p (loop_vec_info);
> > > extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code);
> > > +extern void vec_init_exit_info (class loop *);
> > >
> > > /* In tree-vect-stmts.cc. */
> > > extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
> >
> > So I didn't really see why we should need to have the info in
> > struct loop.
> >
> > Richard.
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)