On Fri, 15 Nov 2024, Richard Sandiford wrote: > Richard Biener <rguent...@suse.de> writes: > > The following ensures that peeling a single iteration for gaps is > > sufficient by enforcing niter masking (partial vector use) given > > we cannot (always) statically decide when the vector size isn't known. > > The condition guarding this and thus statically giving a pass in > > some cases for VL vectors is questionable, the patch doesn't address > > this. > > > > This fixes a set of known failout from enabling > > --param vect-force-slp=1 by default. > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > > > PR tree-optimization/117558 > > * tree-vectorizer.h (_loop_vec_info::must_use_partial_vectors_p): New. > > (LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P): Likewise. > > * tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Initialize > > must_use_partial_vectors_p. > > (vect_determine_partial_vectors_and_peeling): Enforce it. > > (vect_analyze_loop_2): Reset before restarting. > > * tree-vect-stmts.cc (get_group_load_store_type): When peeling > > a single gap iteration cannot be determined safe statically > > enforce the use of partial vectors. > > LGTM. Just to make sure I understand... > > > --- > > gcc/tree-vect-loop.cc | 13 ++++++++++++- > > gcc/tree-vect-stmts.cc | 24 +++++++++++++++++++----- > > gcc/tree-vectorizer.h | 4 ++++ > > 3 files changed, 35 insertions(+), 6 deletions(-) > > > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > > index c67248e851d..18c4fa1d000 100644 > > --- a/gcc/tree-vect-loop.cc > > +++ b/gcc/tree-vect-loop.cc > > @@ -1059,6 +1059,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, > > vec_info_shared *shared) > > inner_loop_cost_factor (param_vect_inner_loop_cost_factor), > > vectorizable (false), > > can_use_partial_vectors_p (param_vect_partial_vector_usage != 0), > > + must_use_partial_vectors_p (false), > > using_partial_vectors_p (false), > > using_decrementing_iv_p (false), > > using_select_vl_p (false), > > @@ -2679,7 +2680,10 @@ vect_determine_partial_vectors_and_peeling > > (loop_vec_info loop_vinfo) > > LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false; > > LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = false; > > if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) > > - && need_peeling_or_partial_vectors_p) > > + && LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo)) > > + LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true; > > + else if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) > > + && need_peeling_or_partial_vectors_p) > > { > > /* For partial-vector-usage=1, try to push the handling of partial > > vectors to the epilogue, with the main loop continuing to operate > > @@ -2702,6 +2706,12 @@ vect_determine_partial_vectors_and_peeling > > (loop_vec_info loop_vinfo) > > LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true; > > } > > > > + if (LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) > > + && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) > > + return opt_result::failure_at (vect_location, > > + "not vectorized: loop needs but cannot " > > + "use partial vectors\n"); > > + > > if (dump_enabled_p ()) > > dump_printf_loc (MSG_NOTE, vect_location, > > "operating on %s vectors%s.\n", > > @@ -3387,6 +3397,7 @@ again: > > LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0; > > LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) > > = saved_can_use_partial_vectors_p; > > + LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; > > LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false; > > if (loop_vinfo->scan_map) > > loop_vinfo->scan_map->empty (); > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > > index 458056dd13d..f4a4d5a554c 100644 > > --- a/gcc/tree-vect-stmts.cc > > +++ b/gcc/tree-vect-stmts.cc > > @@ -2202,11 +2202,25 @@ get_group_load_store_type (vec_info *vinfo, > > stmt_vec_info stmt_info, > > (vectype, cnunits / cpart_size, > > &half_vtype) == NULL_TREE))) > > { > > - if (dump_enabled_p ()) > > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > > - "peeling for gaps insufficient for " > > - "access\n"); > > - return false; > > + /* If all fails we can still resort to niter masking, so > > + enforce the use of partial vectors. */ > > + if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) > > + { > > + if (dump_enabled_p ()) > > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > > + "peeling for gaps insufficient for " > > + "access unless using partial " > > + "vectors\n"); > > + LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true; > > + } > > + else > > + { > > + if (dump_enabled_p ()) > > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > > + "peeling for gaps insufficient for " > > + "access\n"); > > + return false; > > + } > > ...is this a compile-time optimisation? I.e. CAN_USE_PARTIAL_VECTORS_P > mustn't ever go from false to true, so if it's already false, there's no > point continuing?
Yes, also there's targets for which CAN_USE_PARTIAL_VECTORS_P is false from the start, so the message in this case is more to the point. Richard. > Richard > > > } > > } > > } > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > > index 273e8c644e7..d85dd594094 100644 > > --- a/gcc/tree-vectorizer.h > > +++ b/gcc/tree-vectorizer.h > > @@ -913,6 +913,9 @@ public: > > fewer than VF scalars. */ > > bool can_use_partial_vectors_p; > > > > + /* Records whether we must use niter masking for correctness reasons. */ > > + bool must_use_partial_vectors_p; > > + > > /* True if we've decided to use partially-populated vectors, so that > > the vector loop can handle fewer than VF scalars. */ > > bool using_partial_vectors_p; > > @@ -1051,6 +1054,7 @@ public: > > #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold > > #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable > > #define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) > > (L)->can_use_partial_vectors_p > > +#define LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P(L) > > (L)->can_use_partial_vectors_p > > #define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p > > #define LOOP_VINFO_USING_DECREMENTING_IV_P(L) (L)->using_decrementing_iv_p > > #define LOOP_VINFO_USING_SELECT_VL_P(L) (L)->using_select_vl_p > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)