> -----Original Message-----
> From: Richard Biener <rguent...@suse.de>
> Sent: Friday, July 4, 2025 10:42 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Richard Sandiford <richard.sandif...@arm.com>; Tamar Christina
> <tamar.christ...@arm.com>
> Subject: [PATCH 1/2] Allow the target to request a masked vector epilogue
>
> Targets recently got the ability to request the vector mode to be
> used for a vector epilogue (or the epilogue of a vector epilogue). The
> following adds the ability for it to indicate the epilogue should use
> loop masking, irrespective of the --param vect-partial-vector-usage
> default setting.
>
> The patch below uses a separate flag from the epilogue mode, not
> addressing the issue that on x86 the vector_modes mode iteration
> hook would not allow for both masked and unmasked variants to be
> tried and costed given this doesn't naturally map to modes on
> that target. That's left for a future exercise - turning on
> cost comparison for the x86 backend would be a prerequesite there.
Looks reasonable to me. Seems like a useful addition when the
mode chosen can be both masked and unmasked.
Cheers,
Tamar
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> OK?
>
> I'll wait on discussion on 2/2 which is x86 target specific.
>
> * tree-vectorizer.h (vector_costs::suggested_epilogue_mode):
> Add masked output parameter and return m_masked_epilogue.
> (vector_costs::m_masked_epilogue): New tristate flag.
> (vector_costs::vector_costs): Initialize m_masked_epilogue.
> * tree-vect-loop.cc (vect_analyze_loop_1): Pass in masked
> flag to optionally initialize can_use_partial_vectors_p.
> (vect_analyze_loop): For epilogues also get whether to use
> a masked epilogue for this loop from the target and use
> that for the first epilogue mode we try.
> ---
> gcc/tree-vect-loop.cc | 35 ++++++++++++++++++++++++++---------
> gcc/tree-vectorizer.h | 13 ++++++++++---
> 2 files changed, 36 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index fcdda6d6e8d..5c17b59e983 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see
> #include "tree-eh.h"
> #include "case-cfn-macros.h"
> #include "langhooks.h"
> +#include "opts.h"
>
> /* Loop Vectorization Pass.
>
> @@ -3402,8 +3403,10 @@ vect_joust_loop_vinfos (loop_vec_info
> new_loop_vinfo,
> }
>
> /* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if
> ORIG_LOOP_VINFO is
> - not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
> - MODE_I to the next mode useful to analyze.
> + not NULL. When MASKED_P is not -1 override the default
> + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P with it.
> + Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance MODE_I to the
> next
> + mode useful to analyze.
> Return the loop_vinfo on success and wrapped null on failure. */
>
> static opt_loop_vec_info
> @@ -3411,6 +3414,7 @@ vect_analyze_loop_1 (class loop *loop,
> vec_info_shared *shared,
> const vect_loop_form_info *loop_form_info,
> loop_vec_info orig_loop_vinfo,
> const vector_modes &vector_modes, unsigned &mode_i,
> + int masked_p,
> machine_mode &autodetected_vector_mode,
> bool &fatal)
> {
> @@ -3419,6 +3423,8 @@ vect_analyze_loop_1 (class loop *loop,
> vec_info_shared *shared,
>
> machine_mode vector_mode = vector_modes[mode_i];
> loop_vinfo->vector_mode = vector_mode;
> + if (masked_p != -1)
> + loop_vinfo->can_use_partial_vectors_p = masked_p;
> unsigned int suggested_unroll_factor = 1;
> unsigned slp_done_for_suggested_uf = 0;
>
> @@ -3602,7 +3608,7 @@ vect_analyze_loop (class loop *loop, gimple
> *loop_vectorized_call,
> cached_vf_per_mode[last_mode_i] = -1;
> opt_loop_vec_info loop_vinfo
> = vect_analyze_loop_1 (loop, shared, &loop_form_info,
> - NULL, vector_modes, mode_i,
> + NULL, vector_modes, mode_i, -1,
> autodetected_vector_mode, fatal);
> if (fatal)
> break;
> @@ -3687,18 +3693,21 @@ vect_analyze_loop (class loop *loop, gimple
> *loop_vectorized_call,
> array may contain length-agnostic and length-specific modes. Their
> ordering is not guaranteed, so we could end up picking a mode for the
> main
> loop that is after the epilogue's optimal mode. */
> + int masked_p = -1;
> if (!unlimited_cost_model (loop)
> - && first_loop_vinfo->vector_costs->suggested_epilogue_mode () !=
> VOIDmode)
> + && (first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p)
> + != VOIDmode))
> {
> vector_modes[0]
> - = first_loop_vinfo->vector_costs->suggested_epilogue_mode ();
> + = first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p);
> cached_vf_per_mode[0] = 0;
> }
> else
> vector_modes[0] = autodetected_vector_mode;
> mode_i = 0;
>
> - bool supports_partial_vectors = param_vect_partial_vector_usage != 0;
> + bool supports_partial_vectors = (param_vect_partial_vector_usage != 0
> + || masked_p == 1);
> machine_mode mask_mode;
> if (supports_partial_vectors
> && !partial_vectors_supported_p ()
> @@ -3712,6 +3721,10 @@ vect_analyze_loop (class loop *loop, gimple
> *loop_vectorized_call,
> loop_vec_info orig_loop_vinfo = first_loop_vinfo;
> do
> {
> + /* Let the user override what the target suggests. */
> + if (OPTION_SET_P (param_vect_partial_vector_usage))
> + masked_p = -1;
> +
> while (1)
> {
> /* If the target does not support partial vectors we can shorten the
> @@ -3752,7 +3765,7 @@ vect_analyze_loop (class loop *loop, gimple
> *loop_vectorized_call,
> opt_loop_vec_info loop_vinfo
> = vect_analyze_loop_1 (loop, shared, &loop_form_info,
> orig_loop_vinfo,
> - vector_modes, mode_i,
> + vector_modes, mode_i, masked_p,
> autodetected_vector_mode, fatal);
> if (fatal)
> break;
> @@ -3783,6 +3796,9 @@ vect_analyze_loop (class loop *loop, gimple
> *loop_vectorized_call,
> break;
> }
>
> + /* Revert back to the default from the suggested prefered
> + epilogue vectorization mode. */
> + masked_p = -1;
> if (mode_i == vector_modes.length ())
> break;
> }
> @@ -3793,13 +3809,14 @@ vect_analyze_loop (class loop *loop, gimple
> *loop_vectorized_call,
>
> /* When we selected a first vectorized epilogue, see if the target
> suggests to have another one. */
> + masked_p = -1;
> if (!unlimited_cost_model (loop)
> && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (orig_loop_vinfo)
> - && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode ()
> + && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode
> (masked_p)
> != VOIDmode))
> {
> vector_modes[0]
> - = orig_loop_vinfo->vector_costs->suggested_epilogue_mode ();
> + = orig_loop_vinfo->vector_costs->suggested_epilogue_mode
> (masked_p);
> cached_vf_per_mode[0] = 0;
> mode_i = 0;
> }
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 66a29648fb4..d41b73f610f 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1714,7 +1714,7 @@ public:
> unsigned int outside_cost () const;
> unsigned int total_cost () const;
> unsigned int suggested_unroll_factor () const;
> - machine_mode suggested_epilogue_mode () const;
> + machine_mode suggested_epilogue_mode (int &masked) const;
>
> protected:
> unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
> @@ -1738,8 +1738,13 @@ protected:
> unsigned int m_suggested_unroll_factor;
>
> /* The suggested mode to be used for a vectorized epilogue or VOIDmode,
> - determined at finish_cost. */
> + determined at finish_cost. m_masked_epilogue is epilogue should use
> + masked vectorization, regardless of the --param
> vect-partial-vector-usage
> + default. If -1 then the --param setting takes precedence. If the
> + user explicitly specified --param vect-partial-vector-usage then that
> + takes precedence. */
> machine_mode m_suggested_epilogue_mode;
> + int m_masked_epilogue;
>
> /* True if finish_cost has been called. */
> bool m_finished;
> @@ -1755,6 +1760,7 @@ vector_costs::vector_costs (vec_info *vinfo, bool
> costing_for_scalar)
> m_costs (),
> m_suggested_unroll_factor(1),
> m_suggested_epilogue_mode(VOIDmode),
> + m_masked_epilogue (-1),
> m_finished (false)
> {
> }
> @@ -1815,9 +1821,10 @@ vector_costs::suggested_unroll_factor () const
> /* Return the suggested epilogue mode. */
>
> inline machine_mode
> -vector_costs::suggested_epilogue_mode () const
> +vector_costs::suggested_epilogue_mode (int &masked_p) const
> {
> gcc_checking_assert (m_finished);
> + masked_p = m_masked_epilogue;
> return m_suggested_epilogue_mode;
> }
>
> --
> 2.43.0