> -----Original Message-----
> From: Richard Biener <rguent...@suse.de>
> Sent: Friday, July 4, 2025 10:42 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Richard Sandiford <richard.sandif...@arm.com>; Tamar Christina
> <tamar.christ...@arm.com>
> Subject: [PATCH 1/2] Allow the target to request a masked vector epilogue
> 
> Targets recently got the ability to request the vector mode to be
> used for a vector epilogue (or the epilogue of a vector epilogue).  The
> following adds the ability for it to indicate the epilogue should use
> loop masking, irrespective of the --param vect-partial-vector-usage
> default setting.
> 
> The patch below uses a separate flag from the epilogue mode, not
> addressing the issue that on x86 the vector_modes mode iteration
> hook would not allow for both masked and unmasked variants to be
> tried and costed given this doesn't naturally map to modes on
> that target.  That's left for a future exercise - turning on
> cost comparison for the x86 backend would be a prerequesite there.

Looks reasonable to me.  Seems like a useful addition when the
mode chosen can be both masked and unmasked.

Cheers,
Tamar

> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
> 
> OK?
> 
> I'll wait on discussion on 2/2 which is x86 target specific.
> 
>       * tree-vectorizer.h (vector_costs::suggested_epilogue_mode):
>       Add masked output parameter and return m_masked_epilogue.
>       (vector_costs::m_masked_epilogue): New tristate flag.
>       (vector_costs::vector_costs): Initialize m_masked_epilogue.
>       * tree-vect-loop.cc (vect_analyze_loop_1): Pass in masked
>       flag to optionally initialize can_use_partial_vectors_p.
>       (vect_analyze_loop): For epilogues also get whether to use
>       a masked epilogue for this loop from the target and use
>       that for the first epilogue mode we try.
> ---
>  gcc/tree-vect-loop.cc | 35 ++++++++++++++++++++++++++---------
>  gcc/tree-vectorizer.h | 13 ++++++++++---
>  2 files changed, 36 insertions(+), 12 deletions(-)
> 
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index fcdda6d6e8d..5c17b59e983 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-eh.h"
>  #include "case-cfn-macros.h"
>  #include "langhooks.h"
> +#include "opts.h"
> 
>  /* Loop Vectorization Pass.
> 
> @@ -3402,8 +3403,10 @@ vect_joust_loop_vinfos (loop_vec_info
> new_loop_vinfo,
>  }
> 
>  /* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if
> ORIG_LOOP_VINFO is
> -   not NULL.  Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
> -   MODE_I to the next mode useful to analyze.
> +   not NULL.  When MASKED_P is not -1 override the default
> +   LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P with it.
> +   Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance MODE_I to the
> next
> +   mode useful to analyze.
>     Return the loop_vinfo on success and wrapped null on failure.  */
> 
>  static opt_loop_vec_info
> @@ -3411,6 +3414,7 @@ vect_analyze_loop_1 (class loop *loop,
> vec_info_shared *shared,
>                    const vect_loop_form_info *loop_form_info,
>                    loop_vec_info orig_loop_vinfo,
>                    const vector_modes &vector_modes, unsigned &mode_i,
> +                  int masked_p,
>                    machine_mode &autodetected_vector_mode,
>                    bool &fatal)
>  {
> @@ -3419,6 +3423,8 @@ vect_analyze_loop_1 (class loop *loop,
> vec_info_shared *shared,
> 
>    machine_mode vector_mode = vector_modes[mode_i];
>    loop_vinfo->vector_mode = vector_mode;
> +  if (masked_p != -1)
> +    loop_vinfo->can_use_partial_vectors_p = masked_p;
>    unsigned int suggested_unroll_factor = 1;
>    unsigned slp_done_for_suggested_uf = 0;
> 
> @@ -3602,7 +3608,7 @@ vect_analyze_loop (class loop *loop, gimple
> *loop_vectorized_call,
>        cached_vf_per_mode[last_mode_i] = -1;
>        opt_loop_vec_info loop_vinfo
>       = vect_analyze_loop_1 (loop, shared, &loop_form_info,
> -                            NULL, vector_modes, mode_i,
> +                            NULL, vector_modes, mode_i, -1,
>                              autodetected_vector_mode, fatal);
>        if (fatal)
>       break;
> @@ -3687,18 +3693,21 @@ vect_analyze_loop (class loop *loop, gimple
> *loop_vectorized_call,
>       array may contain length-agnostic and length-specific modes.  Their
>       ordering is not guaranteed, so we could end up picking a mode for the 
> main
>       loop that is after the epilogue's optimal mode.  */
> +  int masked_p = -1;
>    if (!unlimited_cost_model (loop)
> -      && first_loop_vinfo->vector_costs->suggested_epilogue_mode () !=
> VOIDmode)
> +      && (first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p)
> +       != VOIDmode))
>      {
>        vector_modes[0]
> -     = first_loop_vinfo->vector_costs->suggested_epilogue_mode ();
> +     = first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p);
>        cached_vf_per_mode[0] = 0;
>      }
>    else
>      vector_modes[0] = autodetected_vector_mode;
>    mode_i = 0;
> 
> -  bool supports_partial_vectors = param_vect_partial_vector_usage != 0;
> +  bool supports_partial_vectors = (param_vect_partial_vector_usage != 0
> +                                || masked_p == 1);
>    machine_mode mask_mode;
>    if (supports_partial_vectors
>        && !partial_vectors_supported_p ()
> @@ -3712,6 +3721,10 @@ vect_analyze_loop (class loop *loop, gimple
> *loop_vectorized_call,
>    loop_vec_info orig_loop_vinfo = first_loop_vinfo;
>    do
>      {
> +      /* Let the user override what the target suggests.  */
> +      if (OPTION_SET_P (param_vect_partial_vector_usage))
> +     masked_p = -1;
> +
>        while (1)
>       {
>         /* If the target does not support partial vectors we can shorten the
> @@ -3752,7 +3765,7 @@ vect_analyze_loop (class loop *loop, gimple
> *loop_vectorized_call,
>         opt_loop_vec_info loop_vinfo
>           = vect_analyze_loop_1 (loop, shared, &loop_form_info,
>                                  orig_loop_vinfo,
> -                                vector_modes, mode_i,
> +                                vector_modes, mode_i, masked_p,
>                                  autodetected_vector_mode, fatal);
>         if (fatal)
>           break;
> @@ -3783,6 +3796,9 @@ vect_analyze_loop (class loop *loop, gimple
> *loop_vectorized_call,
>               break;
>           }
> 
> +       /* Revert back to the default from the suggested prefered
> +          epilogue vectorization mode.  */
> +       masked_p = -1;
>         if (mode_i == vector_modes.length ())
>           break;
>       }
> @@ -3793,13 +3809,14 @@ vect_analyze_loop (class loop *loop, gimple
> *loop_vectorized_call,
> 
>        /* When we selected a first vectorized epilogue, see if the target
>        suggests to have another one.  */
> +      masked_p = -1;
>        if (!unlimited_cost_model (loop)
>         && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (orig_loop_vinfo)
> -       && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode ()
> +       && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode
> (masked_p)
>             != VOIDmode))
>       {
>         vector_modes[0]
> -         = orig_loop_vinfo->vector_costs->suggested_epilogue_mode ();
> +         = orig_loop_vinfo->vector_costs->suggested_epilogue_mode
> (masked_p);
>         cached_vf_per_mode[0] = 0;
>         mode_i = 0;
>       }
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 66a29648fb4..d41b73f610f 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1714,7 +1714,7 @@ public:
>    unsigned int outside_cost () const;
>    unsigned int total_cost () const;
>    unsigned int suggested_unroll_factor () const;
> -  machine_mode suggested_epilogue_mode () const;
> +  machine_mode suggested_epilogue_mode (int &masked) const;
> 
>  protected:
>    unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
> @@ -1738,8 +1738,13 @@ protected:
>    unsigned int m_suggested_unroll_factor;
> 
>    /* The suggested mode to be used for a vectorized epilogue or VOIDmode,
> -     determined at finish_cost.  */
> +     determined at finish_cost.  m_masked_epilogue is epilogue should use
> +     masked vectorization, regardless of the --param 
> vect-partial-vector-usage
> +     default.  If -1 then the --param setting takes precedence.  If the
> +     user explicitly specified --param vect-partial-vector-usage then that
> +     takes precedence.  */
>    machine_mode m_suggested_epilogue_mode;
> +  int m_masked_epilogue;
> 
>    /* True if finish_cost has been called.  */
>    bool m_finished;
> @@ -1755,6 +1760,7 @@ vector_costs::vector_costs (vec_info *vinfo, bool
> costing_for_scalar)
>      m_costs (),
>      m_suggested_unroll_factor(1),
>      m_suggested_epilogue_mode(VOIDmode),
> +    m_masked_epilogue (-1),
>      m_finished (false)
>  {
>  }
> @@ -1815,9 +1821,10 @@ vector_costs::suggested_unroll_factor () const
>  /* Return the suggested epilogue mode.  */
> 
>  inline machine_mode
> -vector_costs::suggested_epilogue_mode () const
> +vector_costs::suggested_epilogue_mode (int &masked_p) const
>  {
>    gcc_checking_assert (m_finished);
> +  masked_p = m_masked_epilogue;
>    return m_suggested_epilogue_mode;
>  }
> 
> --
> 2.43.0

Reply via email to