Targets recently got the ability to request the vector mode to be
used for a vector epilogue (or the epilogue of a vector epilogue).  The
following adds the ability for it to indicate the epilogue should use
loop masking, irrespective of the --param vect-partial-vector-usage
default setting.

The patch below uses a separate flag from the epilogue mode, not
addressing the issue that on x86 the vector_modes mode iteration
hook would not allow for both masked and unmasked variants to be
tried and costed given this doesn't naturally map to modes on
that target.  That's left for a future exercise - turning on
cost comparison for the x86 backend would be a prerequesite there.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK?

I'll wait on discussion on 2/2 which is x86 target specific.

        * tree-vectorizer.h (vector_costs::suggested_epilogue_mode):
        Add masked output parameter and return m_masked_epilogue.
        (vector_costs::m_masked_epilogue): New tristate flag.
        (vector_costs::vector_costs): Initialize m_masked_epilogue.
        * tree-vect-loop.cc (vect_analyze_loop_1): Pass in masked
        flag to optionally initialize can_use_partial_vectors_p.
        (vect_analyze_loop): For epilogues also get whether to use
        a masked epilogue for this loop from the target and use
        that for the first epilogue mode we try.
---
 gcc/tree-vect-loop.cc | 35 ++++++++++++++++++++++++++---------
 gcc/tree-vectorizer.h | 13 ++++++++++---
 2 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index fcdda6d6e8d..5c17b59e983 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -58,6 +58,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-eh.h"
 #include "case-cfn-macros.h"
 #include "langhooks.h"
+#include "opts.h"
 
 /* Loop Vectorization Pass.
 
@@ -3402,8 +3403,10 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
 }
 
 /* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if ORIG_LOOP_VINFO is
-   not NULL.  Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
-   MODE_I to the next mode useful to analyze.
+   not NULL.  When MASKED_P is not -1 override the default
+   LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P with it.
+   Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance MODE_I to the next
+   mode useful to analyze.
    Return the loop_vinfo on success and wrapped null on failure.  */
 
 static opt_loop_vec_info
@@ -3411,6 +3414,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared 
*shared,
                     const vect_loop_form_info *loop_form_info,
                     loop_vec_info orig_loop_vinfo,
                     const vector_modes &vector_modes, unsigned &mode_i,
+                    int masked_p,
                     machine_mode &autodetected_vector_mode,
                     bool &fatal)
 {
@@ -3419,6 +3423,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared 
*shared,
 
   machine_mode vector_mode = vector_modes[mode_i];
   loop_vinfo->vector_mode = vector_mode;
+  if (masked_p != -1)
+    loop_vinfo->can_use_partial_vectors_p = masked_p;
   unsigned int suggested_unroll_factor = 1;
   unsigned slp_done_for_suggested_uf = 0;
 
@@ -3602,7 +3608,7 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
       cached_vf_per_mode[last_mode_i] = -1;
       opt_loop_vec_info loop_vinfo
        = vect_analyze_loop_1 (loop, shared, &loop_form_info,
-                              NULL, vector_modes, mode_i,
+                              NULL, vector_modes, mode_i, -1,
                               autodetected_vector_mode, fatal);
       if (fatal)
        break;
@@ -3687,18 +3693,21 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
      array may contain length-agnostic and length-specific modes.  Their
      ordering is not guaranteed, so we could end up picking a mode for the main
      loop that is after the epilogue's optimal mode.  */
+  int masked_p = -1;
   if (!unlimited_cost_model (loop)
-      && first_loop_vinfo->vector_costs->suggested_epilogue_mode () != 
VOIDmode)
+      && (first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p)
+         != VOIDmode))
     {
       vector_modes[0]
-       = first_loop_vinfo->vector_costs->suggested_epilogue_mode ();
+       = first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p);
       cached_vf_per_mode[0] = 0;
     }
   else
     vector_modes[0] = autodetected_vector_mode;
   mode_i = 0;
 
-  bool supports_partial_vectors = param_vect_partial_vector_usage != 0;
+  bool supports_partial_vectors = (param_vect_partial_vector_usage != 0
+                                  || masked_p == 1);
   machine_mode mask_mode;
   if (supports_partial_vectors
       && !partial_vectors_supported_p ()
@@ -3712,6 +3721,10 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
   loop_vec_info orig_loop_vinfo = first_loop_vinfo;
   do
     {
+      /* Let the user override what the target suggests.  */
+      if (OPTION_SET_P (param_vect_partial_vector_usage))
+       masked_p = -1;
+
       while (1)
        {
          /* If the target does not support partial vectors we can shorten the
@@ -3752,7 +3765,7 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
          opt_loop_vec_info loop_vinfo
            = vect_analyze_loop_1 (loop, shared, &loop_form_info,
                                   orig_loop_vinfo,
-                                  vector_modes, mode_i,
+                                  vector_modes, mode_i, masked_p,
                                   autodetected_vector_mode, fatal);
          if (fatal)
            break;
@@ -3783,6 +3796,9 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
                break;
            }
 
+         /* Revert back to the default from the suggested prefered
+            epilogue vectorization mode.  */
+         masked_p = -1;
          if (mode_i == vector_modes.length ())
            break;
        }
@@ -3793,13 +3809,14 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
 
       /* When we selected a first vectorized epilogue, see if the target
         suggests to have another one.  */
+      masked_p = -1;
       if (!unlimited_cost_model (loop)
          && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (orig_loop_vinfo)
-         && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode ()
+         && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p)
              != VOIDmode))
        {
          vector_modes[0]
-           = orig_loop_vinfo->vector_costs->suggested_epilogue_mode ();
+           = orig_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p);
          cached_vf_per_mode[0] = 0;
          mode_i = 0;
        }
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 66a29648fb4..d41b73f610f 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1714,7 +1714,7 @@ public:
   unsigned int outside_cost () const;
   unsigned int total_cost () const;
   unsigned int suggested_unroll_factor () const;
-  machine_mode suggested_epilogue_mode () const;
+  machine_mode suggested_epilogue_mode (int &masked) const;
 
 protected:
   unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
@@ -1738,8 +1738,13 @@ protected:
   unsigned int m_suggested_unroll_factor;
 
   /* The suggested mode to be used for a vectorized epilogue or VOIDmode,
-     determined at finish_cost.  */
+     determined at finish_cost.  m_masked_epilogue is epilogue should use
+     masked vectorization, regardless of the --param vect-partial-vector-usage
+     default.  If -1 then the --param setting takes precedence.  If the
+     user explicitly specified --param vect-partial-vector-usage then that
+     takes precedence.  */
   machine_mode m_suggested_epilogue_mode;
+  int m_masked_epilogue;
 
   /* True if finish_cost has been called.  */
   bool m_finished;
@@ -1755,6 +1760,7 @@ vector_costs::vector_costs (vec_info *vinfo, bool 
costing_for_scalar)
     m_costs (),
     m_suggested_unroll_factor(1),
     m_suggested_epilogue_mode(VOIDmode),
+    m_masked_epilogue (-1),
     m_finished (false)
 {
 }
@@ -1815,9 +1821,10 @@ vector_costs::suggested_unroll_factor () const
 /* Return the suggested epilogue mode.  */
 
 inline machine_mode
-vector_costs::suggested_epilogue_mode () const
+vector_costs::suggested_epilogue_mode (int &masked_p) const
 {
   gcc_checking_assert (m_finished);
+  masked_p = m_masked_epilogue;
   return m_suggested_epilogue_mode;
 }
 
-- 
2.43.0

Reply via email to