The following fixes the computation of supports_partial_vectors which
is used to prune the set of modes to iterate over for epilog
vectorization. The used partial_vectors_supported_p predicate
only looks for while_ult while also support predication when
mask modes are integer modes as for AVX512.
I've noticed this isn't very effective on x86_64 anyway since
if the main loop mode is autodetected we skip re-analyzing
mode_i == 0, but then mode_i == 1 is usually the very same
large mode. This is fixed by the next patch.
The following simplifies the logic by simply re-using the
already computed LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P from
the main loop to decide whether we can possibly use partial
vectors for the epilogue (for the case of having the same VF).
We remember the main loop analysis before a suggested unroll
factor is applied to avoid possible differences from that.
Bootstrap and regtest ongoing on x86_64-unknown-linux-gnu.
* tree-vect-loop.cc (vect_analyze_loop_1): New parameter
to output whether the not unrolled loop can use partial
vectors.
(vect_analyze_loop): Use the main loop partial vector
analysis result to decide if epilogues with the same VF
can use partial vectors.
---
gcc/tree-vect-loop.cc | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index c824b5abaaf..fa022dfad42 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3474,7 +3474,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared
*shared,
loop_vec_info orig_loop_vinfo,
const vector_modes &vector_modes, unsigned &mode_i,
machine_mode &autodetected_vector_mode,
- bool &fatal)
+ bool &fatal,
+ bool &loop_as_epilogue_supports_partial_vectors)
{
loop_vec_info loop_vinfo
= vect_create_loop_vinfo (loop, shared, loop_form_info, orig_loop_vinfo);
@@ -3488,6 +3489,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared
*shared,
opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal,
&suggested_unroll_factor,
slp_done_for_suggested_uf);
+ loop_as_epilogue_supports_partial_vectors
+ = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"***** Analysis %s with vector mode %s\n",
@@ -3633,6 +3636,8 @@ vect_analyze_loop (class loop *loop, gimple
*loop_vectorized_call,
for (unsigned i = 0; i < vector_modes.length (); ++i)
cached_vf_per_mode.safe_push (0);
+ bool supports_partial_vectors = false;
+
/* First determine the main loop vectorization mode, either the first
one that works, starting with auto-detecting the vector mode and then
following the targets order of preference, or the one with the
@@ -3644,10 +3649,12 @@ vect_analyze_loop (class loop *loop, gimple
*loop_vectorized_call,
/* Set cached VF to -1 prior to analysis, which indicates a mode has
failed. */
cached_vf_per_mode[last_mode_i] = -1;
+ bool loop_as_epilogue_supports_partial_vectors;
opt_loop_vec_info loop_vinfo
= vect_analyze_loop_1 (loop, shared, &loop_form_info,
NULL, vector_modes, mode_i,
- autodetected_vector_mode, fatal);
+ autodetected_vector_mode, fatal,
+ loop_as_epilogue_supports_partial_vectors);
if (fatal)
break;
@@ -3677,7 +3684,11 @@ vect_analyze_loop (class loop *loop, gimple
*loop_vectorized_call,
first_loop_vinfo = opt_loop_vec_info::success (NULL);
}
if (first_loop_vinfo == NULL)
- first_loop_vinfo = loop_vinfo;
+ {
+ first_loop_vinfo = loop_vinfo;
+ supports_partial_vectors
+ = loop_as_epilogue_supports_partial_vectors;
+ }
else
{
delete loop_vinfo;
@@ -3742,8 +3753,7 @@ vect_analyze_loop (class loop *loop, gimple
*loop_vectorized_call,
vector_modes[0] = autodetected_vector_mode;
mode_i = 0;
- bool supports_partial_vectors =
- partial_vectors_supported_p () && param_vect_partial_vector_usage != 0;
+ supports_partial_vectors &= param_vect_partial_vector_usage != 0;
poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo);
loop_vec_info orig_loop_vinfo = first_loop_vinfo;
@@ -3769,12 +3779,13 @@ vect_analyze_loop (class loop *loop, gimple
*loop_vectorized_call,
"***** Re-trying epilogue analysis with vector "
"mode %s\n", GET_MODE_NAME (vector_modes[mode_i]));
- bool fatal;
+ bool fatal, loop_as_epilogue_supports_partial_vectors;
opt_loop_vec_info loop_vinfo
= vect_analyze_loop_1 (loop, shared, &loop_form_info,
orig_loop_vinfo,
vector_modes, mode_i,
- autodetected_vector_mode, fatal);
+ autodetected_vector_mode, fatal,
+ loop_as_epilogue_supports_partial_vectors);
if (fatal)
break;
--
2.43.0