On Fri, 15 Nov 2024, Richard Sandiford wrote:

> Richard Biener <rguent...@suse.de> writes:
> > The following ensures that peeling a single iteration for gaps is
> > sufficient by enforcing niter masking (partial vector use) given
> > we cannot (always) statically decide when the vector size isn't known.
> > The condition guarding this and thus statically giving a pass in
> > some cases for VL vectors is questionable, the patch doesn't address
> > this.
> >
> > This fixes a set of known failout from enabling
> > --param vect-force-slp=1 by default.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> >
> >     PR tree-optimization/117558
> >     * tree-vectorizer.h (_loop_vec_info::must_use_partial_vectors_p): New.
> >     (LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P): Likewise.
> >     * tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Initialize
> >     must_use_partial_vectors_p.
> >     (vect_determine_partial_vectors_and_peeling): Enforce it.
> >     (vect_analyze_loop_2): Reset before restarting.
> >     * tree-vect-stmts.cc (get_group_load_store_type): When peeling
> >     a single gap iteration cannot be determined safe statically
> >     enforce the use of partial vectors.
> 
> LGTM.  Just to make sure I understand...
> 
> > ---
> >  gcc/tree-vect-loop.cc  | 13 ++++++++++++-
> >  gcc/tree-vect-stmts.cc | 24 +++++++++++++++++++-----
> >  gcc/tree-vectorizer.h  |  4 ++++
> >  3 files changed, 35 insertions(+), 6 deletions(-)
> >
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index c67248e851d..18c4fa1d000 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -1059,6 +1059,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
> > vec_info_shared *shared)
> >      inner_loop_cost_factor (param_vect_inner_loop_cost_factor),
> >      vectorizable (false),
> >      can_use_partial_vectors_p (param_vect_partial_vector_usage != 0),
> > +    must_use_partial_vectors_p (false),
> >      using_partial_vectors_p (false),
> >      using_decrementing_iv_p (false),
> >      using_select_vl_p (false),
> > @@ -2679,7 +2680,10 @@ vect_determine_partial_vectors_and_peeling 
> > (loop_vec_info loop_vinfo)
> >    LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
> >    LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
> >    if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
> > -      && need_peeling_or_partial_vectors_p)
> > +      && LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo))
> > +    LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
> > +  else if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
> > +      && need_peeling_or_partial_vectors_p)
> >      {
> >        /* For partial-vector-usage=1, try to push the handling of partial
> >      vectors to the epilogue, with the main loop continuing to operate
> > @@ -2702,6 +2706,12 @@ vect_determine_partial_vectors_and_peeling 
> > (loop_vec_info loop_vinfo)
> >     LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
> >      }
> >  
> > +  if (LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo)
> > +      && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
> > +    return opt_result::failure_at (vect_location,
> > +                              "not vectorized: loop needs but cannot "
> > +                              "use partial vectors\n");
> > +
> >    if (dump_enabled_p ())
> >      dump_printf_loc (MSG_NOTE, vect_location,
> >                  "operating on %s vectors%s.\n",
> > @@ -3387,6 +3397,7 @@ again:
> >    LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0;
> >    LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
> >      = saved_can_use_partial_vectors_p;
> > +  LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
> >    LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
> >    if (loop_vinfo->scan_map)
> >      loop_vinfo->scan_map->empty ();
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > index 458056dd13d..f4a4d5a554c 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -2202,11 +2202,25 @@ get_group_load_store_type (vec_info *vinfo, 
> > stmt_vec_info stmt_info,
> >                            (vectype, cnunits / cpart_size,
> >                             &half_vtype) == NULL_TREE)))
> >             {
> > -             if (dump_enabled_p ())
> > -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > -                                "peeling for gaps insufficient for "
> > -                                "access\n");
> > -             return false;
> > +             /* If all fails we can still resort to niter masking, so
> > +                enforce the use of partial vectors.  */
> > +             if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> > +               {
> > +                 if (dump_enabled_p ())
> > +                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > +                                    "peeling for gaps insufficient for "
> > +                                    "access unless using partial "
> > +                                    "vectors\n");
> > +                 LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
> > +               }
> > +             else
> > +               {
> > +                 if (dump_enabled_p ())
> > +                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > +                                    "peeling for gaps insufficient for "
> > +                                    "access\n");
> > +                 return false;
> > +               }
> 
> ...is this a compile-time optimisation?  I.e. CAN_USE_PARTIAL_VECTORS_P
> mustn't ever go from false to true, so if it's already false, there's no
> point continuing?

Yes, also there's targets for which CAN_USE_PARTIAL_VECTORS_P is false
from the start, so the message in this case is more to the point.

Richard.

> Richard
> 
> >             }
> >         }
> >     }
> > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > index 273e8c644e7..d85dd594094 100644
> > --- a/gcc/tree-vectorizer.h
> > +++ b/gcc/tree-vectorizer.h
> > @@ -913,6 +913,9 @@ public:
> >       fewer than VF scalars.  */
> >    bool can_use_partial_vectors_p;
> >  
> > +  /* Records whether we must use niter masking for correctness reasons.  */
> > +  bool must_use_partial_vectors_p;
> > +
> >    /* True if we've decided to use partially-populated vectors, so that
> >       the vector loop can handle fewer than VF scalars.  */
> >    bool using_partial_vectors_p;
> > @@ -1051,6 +1054,7 @@ public:
> >  #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold
> >  #define LOOP_VINFO_VECTORIZABLE_P(L)       (L)->vectorizable
> >  #define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) 
> > (L)->can_use_partial_vectors_p
> > +#define LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P(L) 
> > (L)->can_use_partial_vectors_p
> >  #define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p
> >  #define LOOP_VINFO_USING_DECREMENTING_IV_P(L) (L)->using_decrementing_iv_p
> >  #define LOOP_VINFO_USING_SELECT_VL_P(L) (L)->using_select_vl_p
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to