gcc/ChangeLog
2020-MM-DD Kewen Lin <[email protected]>
* doc/invoke.texi (vect-with-length-scope): Document new option.
* params.opt (vect-with-length-scope): New.
* tree-vect-loop-manip.c (vect_set_loop_lens_directly): New function.
(vect_set_loop_condition_len): Likewise.
(vect_set_loop_condition): Call vect_set_loop_condition_len for loop
with
length.
(vect_gen_vector_loop_niters): Use VF as the step for loop with length.
(vect_do_peeling): Adjust for loop with length.
* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
can_with_length_p and fully_with_length_p.
(release_vec_loop_lens): New function.
(_loop_vec_info::~_loop_vec_info): Use it to free the loop lens.
(vect_verify_loop_lens): New function.
(vect_analyze_loop_costing): Adjust for loop fully with length.
(determine_peel_for_niter): Don't peel if loop fully with length.
(vect_analyze_loop_2): Save LOOP_VINFO_CAN_WITH_LENGTH_P around retries,
and free the length rgroups before retrying. Check loop-wide reasons
for
disabling loops with length. Make the final decision about use vector
access with length or not.
(vect_analyze_loop): Add handlings for epilogue of loop that can use
vector
with length but not.
(vect_estimate_min_profitable_iters): Adjust for loop with length.
(vectorizable_reduction): Disable loop with length.
(vectorizable_live_operation): Likewise.
(vect_record_loop_len): New function.
(vect_get_loop_len): Likewise.
(vect_transform_loop): Flag final loop iteration could be partial vector
for loop with length.
* tree-vect-stmts.c (check_load_store_with_len): New function.
(vectorizable_store): Handle vector loop with length.
(vectorizable_load): Likewise.
(vect_gen_len): New function.
* tree-vectorizer.h (struct rgroup_lens): New structure.
(vec_loop_lens): New typedef.
(_loop_vec_info): Add lens, can_with_length_p and fully_with_length_p.
(LOOP_VINFO_CAN_WITH_LENGTH_P): New macro.
(LOOP_VINFO_FULLY_WITH_LENGTH_P): Likewise.
(LOOP_VINFO_LENS): Likewise.
(vect_record_loop_len): New declare.
(vect_get_loop_len): Likewise.
(vect_gen_len): Likewise.
---
gcc/doc/invoke.texi | 7 +
gcc/params.opt | 4 +
gcc/tree-vect-loop-manip.c | 268 ++++++++++++++++++++++++++++++++++++-
gcc/tree-vect-loop.c | 241 ++++++++++++++++++++++++++++++++-
gcc/tree-vect-stmts.c | 152 +++++++++++++++++++++
gcc/tree-vectorizer.h | 32 +++++
6 files changed, 697 insertions(+), 7 deletions(-)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 8b9935dfe65..ac765feab13 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -13079,6 +13079,13 @@ by the copy loop headers pass.
@item vect-epilogues-nomask
Enable loop epilogue vectorization using smaller vector size.
+@item vect-with-length-scope
+Control the scope of vector memory access with length exploitation. 0 means we
+don't expliot any vector memory access with length, 1 means we only exploit
+vector memory access with length for those loops whose iteration number are
+less than VF, such as very small loop or epilogue, 2 means we want to exploit
+vector memory access with length for any loops if possible.
+
@item slp-max-insns-in-bb
Maximum number of instructions in basic block to be
considered for SLP vectorization.
diff --git a/gcc/params.opt b/gcc/params.opt
index 4aec480798b..d4309101067 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -964,4 +964,8 @@ Bound on number of runtime checks inserted by the
vectorizer's loop versioning f
Common Joined UInteger Var(param_vect_max_version_for_alignment_checks)
Init(6) Param Optimization
Bound on number of runtime checks inserted by the vectorizer's loop versioning
for alignment check.
+-param=vect-with-length-scope=
+Common Joined UInteger Var(param_vect_with_length_scope) Init(0)
IntegerRange(0, 2) Param Optimization
+Control the vector with length exploitation scope.
+
; This comment is to ensure we retain the blank line above.
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 8c5e696b995..3d5dec6f65c 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -747,6 +747,263 @@ vect_set_loop_condition_masked (class loop *loop,
loop_vec_info loop_vinfo,
return cond_stmt;
}
+/* Helper for vect_set_loop_condition_len. Like vect_set_loop_masks_directly,
+ generate definitions for all the lengths in RGL and return a length that is
+ nonzero when the loop needs to iterate. Add any new preheader statements to
+ PREHEADER_SEQ. Use LOOP_COND_GSI to insert code before the exit gcond.
+
+ RGL belongs to loop LOOP. The loop originally iterated NITERS
+ times and has been vectorized according to LOOP_VINFO. Each iteration
+ of the vectorized loop handles VF iterations of the scalar loop.
+
+ IV_LIMIT is the limit which induction variable can reach, that will be used
+ to check whether induction variable can wrap before hit the niters. */
+
+static tree
+vect_set_loop_lens_directly (class loop *loop, loop_vec_info loop_vinfo,
+ gimple_seq *preheader_seq,
+ gimple_stmt_iterator loop_cond_gsi,
+ rgroup_lens *rgl, tree niters, widest_int
iv_limit)
+{
+ scalar_int_mode len_mode = targetm.vectorize.length_mode;
+ unsigned int len_prec = GET_MODE_PRECISION (len_mode);
+ tree len_type = build_nonstandard_integer_type (len_prec, true);
+
+ tree vec_type = rgl->vec_type;
+ unsigned int nbytes_per_iter = rgl->nbytes_per_iter;
+ poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vec_type));
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ tree vec_size = build_int_cst (len_type, vector_size);
+
+ /* See whether zero-based IV would ever generate zero length before
+ wrapping around. */
+ bool might_wrap_p = (iv_limit == -1);
+ if (!might_wrap_p)
+ {
+ widest_int iv_limit_max = iv_limit * nbytes_per_iter;
+ might_wrap_p = wi::min_precision (iv_limit_max, UNSIGNED) > len_prec;
+ }
+
+ /* Calculate the maximum number of bytes of scalars that the rgroup
+ handles in total, the number that it handles for each iteration
+ of the vector loop. */
+ tree nbytes_total = niters;
+ tree nbytes_step = build_int_cst (len_type, vf);
+ if (nbytes_per_iter != 1)
+ {
+ tree factor = build_int_cst (len_type, nbytes_per_iter);
+ nbytes_total = gimple_build (preheader_seq, MULT_EXPR, len_type,
+ nbytes_total, factor);
+ nbytes_step = gimple_build (preheader_seq, MULT_EXPR, len_type,
+ nbytes_step, factor);
+ }
+
+ /* Create an induction variable that counts the processed bytes of scalars.
*/
+ tree index_before_incr, index_after_incr;
+ gimple_stmt_iterator incr_gsi;
+ bool insert_after;
+ standard_iv_increment_position (loop, &incr_gsi, &insert_after);
+ create_iv (build_int_cst (len_type, 0), nbytes_step, NULL_TREE, loop,
+ &incr_gsi, insert_after, &index_before_incr, &index_after_incr);
+
+ tree zero_index = build_int_cst (len_type, 0);
+ tree test_index, test_limit, first_limit;
+ gimple_stmt_iterator *test_gsi;
+
+ /* For the first iteration it doesn't matter whether the IV hits
+ a value above NBYTES_TOTAL. That only matters for the latch
+ condition. */
+ first_limit = nbytes_total;
+
+ if (might_wrap_p)
+ {
+ test_index = index_before_incr;
+ tree adjust = gimple_convert (preheader_seq, len_type, nbytes_step);
+ test_limit = gimple_build (preheader_seq, MAX_EXPR, len_type,
+ nbytes_total, adjust);
+ test_limit = gimple_build (preheader_seq, MINUS_EXPR, len_type,
+ test_limit, adjust);
+ test_gsi = &incr_gsi;
+ }
+ else
+ {
+ /* Test the incremented IV, which will always hit a value above
+ the bound before wrapping. */
+ test_index = index_after_incr;
+ test_limit = nbytes_total;
+ test_gsi = &loop_cond_gsi;
+ }
+
+ /* Provide a definition of each length in the group. */
+ tree next_len = NULL_TREE;
+ tree len;
+ unsigned int i;
+ FOR_EACH_VEC_ELT_REVERSE (rgl->lens, i, len)
+ {
+ /* Previous lengths will cover BIAS scalars. This length covers the
+ next batch. Each batch's length should be vector_size. */
+ poly_uint64 bias = vector_size * i;
+ tree bias_tree = build_int_cst (len_type, bias);
+
+ /* See whether the first iteration of the vector loop is known
+ to have a full vector size. */
+ poly_uint64 const_limit;
+ bool first_iteration_full
+ = (poly_int_tree_p (first_limit, &const_limit)
+ && known_ge (const_limit, (i + 1) * vector_size));
+
+ /* Rather than have a new IV that starts at BIAS and goes up to
+ TEST_LIMIT, prefer to use the same 0-based IV for each length
+ and adjust the bound down by BIAS. */
+ tree this_test_limit = test_limit;
+ if (i != 0)
+ {
+ this_test_limit = gimple_build (preheader_seq, MAX_EXPR, len_type,
+ this_test_limit, bias_tree);
+ this_test_limit = gimple_build (preheader_seq, MINUS_EXPR, len_type,
+ this_test_limit, bias_tree);
+ }
+
+ /* Create the initial length. First include all scalar bytes that
+ are within the loop limit. */
+ tree init_len = NULL_TREE;
+ if (!first_iteration_full)
+ {
+ tree start, end;
+ if (first_limit == test_limit)
+ {
+ /* Use a natural test between zero (the initial IV value)
+ and the loop limit. The "else" block would be valid too,
+ but this choice can avoid the need to load BIAS_TREE into
+ a register. */
+ start = zero_index;
+ end = this_test_limit;
+ }
+ else
+ {
+ /* FIRST_LIMIT is the maximum number of scalar bytes handled by
+ the first iteration of the vector loop. Test the portion
+ associated with this length. */
+ start = bias_tree;
+ end = first_limit;
+ }
+
+ init_len = make_temp_ssa_name (len_type, NULL, "max_len");
+ gimple_seq seq = vect_gen_len (init_len, start, end, vec_size);
+ gimple_seq_add_seq (preheader_seq, seq);
+ }
+
+ /* First iteration is full. */
+ if (!init_len)
+ init_len = vec_size;
+
+ /* Get the length value for the next iteration of the loop. */
+ next_len = make_temp_ssa_name (len_type, NULL, "next_len");
+ tree end = this_test_limit;
+ gimple_seq seq = vect_gen_len (next_len, test_index, end, vec_size);
+ gsi_insert_seq_before (test_gsi, seq, GSI_SAME_STMT);
+
+ /* Use mask routine for length. */
+ vect_set_loop_mask (loop, len, init_len, next_len);
+ }
+
+ return next_len;
+}
+
+/* Like vect_set_loop_condition_masked, handle the case vector access with
+ length. */
+
+static gcond *
+vect_set_loop_condition_len (class loop *loop, loop_vec_info loop_vinfo,
+ tree niters, tree final_iv,
+ bool niters_maybe_zero,
+ gimple_stmt_iterator loop_cond_gsi)
+{
+ gimple_seq preheader_seq = NULL;
+ gimple_seq header_seq = NULL;
+ tree orig_niters = niters;
+
+ /* Type of the initial value of NITERS. */
+ tree ni_actual_type = TREE_TYPE (niters);
+ unsigned int ni_actual_prec = TYPE_PRECISION (ni_actual_type);
+
+ /* Obtain target supported length type. */
+ scalar_int_mode len_mode = targetm.vectorize.length_mode;
+ unsigned int len_prec = GET_MODE_PRECISION (len_mode);
+ tree len_type = build_nonstandard_integer_type (len_prec, true);
+
+ /* Calculate the value that the induction variable must be able to hit in
+ order to ensure that we end the loop with an zero length. */
+ widest_int iv_limit = -1;
+ unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo);
+ if (max_loop_iterations (loop, &iv_limit))
+ {
+ /* Round this value down to the previous vector alignment boundary and
+ then add an extra full iteration. */
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf;
+ }
+
+ /* Convert NITERS to the same size as the length. */
+ if (niters_maybe_zero || (len_prec > ni_actual_prec))
+ {
+ /* We know that there is always at least one iteration, so if the
+ count is zero then it must have wrapped. Cope with this by
+ subtracting 1 before the conversion and adding 1 to the result. */
+ gcc_assert (TYPE_UNSIGNED (ni_actual_type));
+ niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type, niters,
+ build_minus_one_cst (ni_actual_type));
+ niters = gimple_convert (&preheader_seq, len_type, niters);
+ niters = gimple_build (&preheader_seq, PLUS_EXPR, len_type, niters,
+ build_one_cst (len_type));
+ }
+ else
+ niters = gimple_convert (&preheader_seq, len_type, niters);
+
+ /* Iterate over all the rgroups and fill in their lengths. We could use
+ the first length from any rgroup for the loop condition; here we
+ arbitrarily pick the last. */
+ tree test_len = NULL_TREE;
+ rgroup_lens *rgl;
+ unsigned int i;
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+
+ FOR_EACH_VEC_ELT (*lens, i, rgl)
+ if (!rgl->lens.is_empty ())
+ /* Set up all lens for this group. */
+ test_len
+ = vect_set_loop_lens_directly (loop, loop_vinfo, &preheader_seq,
+ loop_cond_gsi, rgl, niters, iv_limit);
+
+ /* Emit all accumulated statements. */
+ add_preheader_seq (loop, preheader_seq);
+ add_header_seq (loop, header_seq);
+
+ /* Get a boolean result that tells us whether to iterate. */
+ edge exit_edge = single_exit (loop);
+ tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
+ tree zero_len = build_zero_cst (TREE_TYPE (test_len));
+ gcond *cond_stmt
+ = gimple_build_cond (code, test_len, zero_len, NULL_TREE, NULL_TREE);
+ gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
+
+ /* The loop iterates (NITERS - 1) / VF + 1 times.
+ Subtract one from this to get the latch count. */
+ tree step = build_int_cst (len_type, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+ tree niters_minus_one
+ = fold_build2 (PLUS_EXPR, len_type, niters, build_minus_one_cst
(len_type));
+ loop->nb_iterations
+ = fold_build2 (TRUNC_DIV_EXPR, len_type, niters_minus_one, step);
+
+ if (final_iv)
+ {
+ gassign *assign = gimple_build_assign (final_iv, orig_niters);
+ gsi_insert_on_edge_immediate (single_exit (loop), assign);
+ }
+
+ return cond_stmt;
+}
+
/* Like vect_set_loop_condition, but handle the case in which there
are no loop masks. */
@@ -916,6 +1173,10 @@ vect_set_loop_condition (class loop *loop, loop_vec_info
loop_vinfo,
cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters,
final_iv, niters_maybe_zero,
loop_cond_gsi);
+ else if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+ cond_stmt = vect_set_loop_condition_len (loop, loop_vinfo, niters,
+ final_iv, niters_maybe_zero,
+ loop_cond_gsi);
else
cond_stmt = vect_set_loop_condition_unmasked (loop, niters, step,
final_iv, niters_maybe_zero,
@@ -1939,7 +2200,8 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo,
tree niters,
unsigned HOST_WIDE_INT const_vf;
if (vf.is_constant (&const_vf)
- && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
{
/* Create: niters >> log2(vf) */
/* If it's known that niters == number of latch executions + 1 doesn't
@@ -2472,6 +2734,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
poly_uint64 bound_epilog = 0;
if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
&& LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
bound_epilog += vf - 1;
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
@@ -2567,7 +2830,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
if (vect_epilogues
&& LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& prolog_peeling >= 0
- && known_eq (vf, lowest_vf))
+ && known_eq (vf, lowest_vf)
+ && !LOOP_VINFO_FULLY_WITH_LENGTH_P (epilogue_vinfo))
{
unsigned HOST_WIDE_INT eiters
= (LOOP_VINFO_INT_NITERS (loop_vinfo)
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 80e33b61be7..d61f46becfd 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -815,6 +815,8 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in,
vec_info_shared *shared)
vectorizable (false),
can_fully_mask_p (true),
fully_masked_p (false),
+ can_with_length_p (param_vect_with_length_scope != 0),
+ fully_with_length_p (false),
peeling_for_gaps (false),
peeling_for_niter (false),
no_data_dependencies (false),
@@ -887,6 +889,18 @@ release_vec_loop_masks (vec_loop_masks *masks)
masks->release ();
}
+/* Free all levels of LENS. */
+
+void
+release_vec_loop_lens (vec_loop_lens *lens)
+{
+ rgroup_lens *rgl;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (*lens, i, rgl)
+ rgl->lens.release ();
+ lens->release ();
+}
+
/* Free all memory used by the _loop_vec_info, as well as all the
stmt_vec_info structs of all the stmts in the loop. */
@@ -895,6 +909,7 @@ _loop_vec_info::~_loop_vec_info ()
free (bbs);
release_vec_loop_masks (&masks);
+ release_vec_loop_lens (&lens);
delete ivexpr_map;
delete scan_map;
epilogue_vinfos.release ();
@@ -1056,6 +1071,44 @@ vect_verify_full_masking (loop_vec_info loop_vinfo)
return true;
}
+/* Check whether we can use vector access with length based on precison
+ comparison. So far, to keep it simple, we only allow the case that the
+ precision of the target supported length is larger than the precision
+ required by loop niters. */
+
+static bool
+vect_verify_loop_lens (loop_vec_info loop_vinfo)
+{
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+
+ if (LOOP_VINFO_LENS (loop_vinfo).is_empty ())
+ return false;
+
+ /* Get the maximum number of iterations that is representable
+ in the counter type. */
+ tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo));
+ widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1;
+
+ /* Get a more refined estimate for the number of iterations. */
+ widest_int max_back_edges;
+ if (max_loop_iterations (loop, &max_back_edges))
+ max_ni = wi::smin (max_ni, max_back_edges + 1);
+
+ /* Account for rgroup lengths, in which each bit is replicated N times. */
+ rgroup_lens *rgl = &(*lens)[lens->length () - 1];
+ max_ni *= rgl->nbytes_per_iter;
+
+ /* Work out how many bits we need to represent the limit. */
+ unsigned int min_ni_width = wi::min_precision (max_ni, UNSIGNED);
+
+ unsigned len_bits = GET_MODE_PRECISION (targetm.vectorize.length_mode);
+ if (len_bits < min_ni_width)
+ return false;
+
+ return true;
+}
+
/* Calculate the cost of one scalar iteration of the loop. */
static void
vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
@@ -1630,7 +1683,8 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
/* Only fully-masked loops can have iteration counts less than the
vectorization factor. */
- if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
{
if (known_niters_smaller_than_vf (loop_vinfo))
{
@@ -1858,7 +1912,8 @@ determine_peel_for_niter (loop_vec_info loop_vinfo)
th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO
(loop_vinfo));
- if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
/* The main loop handles all iterations. */
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
@@ -2048,6 +2103,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool
&fatal, unsigned *n_stmts)
}
bool saved_can_fully_mask_p = LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo);
+ bool saved_can_with_length_p = LOOP_VINFO_CAN_WITH_LENGTH_P(loop_vinfo);
/* We don't expect to have to roll back to anything other than an empty
set of rgroups. */
@@ -2144,6 +2200,71 @@ start_over:
"not using a fully-masked loop.\n");
}
+ /* Decide whether we can use vector access with length. */
+
+ if ((LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ || LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
+ && LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use vector access with length becuase peeling"
+ " for alignment or gaps is required.\n");
+ LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+ }
+
+ if (LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo)
+ && !vect_verify_loop_lens (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use vector access with length becuase the"
+ " length precision verification fail.\n");
+ LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+ }
+
+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use vector access with length becuase the"
+ " loop will be fully-masked.\n");
+ LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+ }
+
+ if (LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+ {
+ /* One special case, the loop with max niters less than VF, we can simply
+ take it as body with length. */
+ if (param_vect_with_length_scope == 1)
+ {
+ /* This is the epilogue, should be less than VF. */
+ if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+ LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo) = true;
+ /* Otherwise, ensure the loop iteration less than VF. */
+ else if (known_niters_smaller_than_vf (loop_vinfo))
+ LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo) = true;
+ }
+ else
+ {
+ gcc_assert (param_vect_with_length_scope == 2);
+ LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo) = true;
+ }
+ }
+ else
+ /* Always set it as false in case previous tries set it. */
+ LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo) = false;
+
+ if (dump_enabled_p ())
+ {
+ if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+ dump_printf_loc (MSG_NOTE, vect_location, "using vector access with"
+ " length for loop fully.\n");
+ else
+ dump_printf_loc (MSG_NOTE, vect_location, "not using vector access with"
+ " length for loop fully.\n");
+ }
+
/* If epilog loop is required because of data accesses with gaps,
one additional iteration needs to be peeled. Check if there is
enough iterations for vectorization. */
@@ -2164,6 +2285,7 @@ start_over:
loop or a loop that has a lower VF than the main loop. */
if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
&& !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
&& maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))
return opt_result::failure_at (vect_location,
@@ -2362,12 +2484,14 @@ again:
= init_cost (LOOP_VINFO_LOOP (loop_vinfo));
/* Reset accumulated rgroup information. */
release_vec_loop_masks (&LOOP_VINFO_MASKS (loop_vinfo));
+ release_vec_loop_lens (&LOOP_VINFO_LENS (loop_vinfo));
/* Reset assorted flags. */
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0;
LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = saved_can_fully_mask_p;
+ LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = saved_can_with_length_p;
goto start_over;
}
@@ -2646,8 +2770,10 @@ vect_analyze_loop (class loop *loop, vec_info_shared
*shared)
if (ordered_p (lowest_th, th))
lowest_th = ordered_min (lowest_th, th);
}
- else
- delete loop_vinfo;
+ else {
+ delete loop_vinfo;
+ loop_vinfo = opt_loop_vec_info::success (NULL);
+ }
/* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
enabled, SIMDUID is not set, it is the innermost loop and we have
@@ -2672,6 +2798,7 @@ vect_analyze_loop (class loop *loop, vec_info_shared
*shared)
else
{
delete loop_vinfo;
+ loop_vinfo = opt_loop_vec_info::success (NULL);
if (fatal)
{
gcc_checking_assert (first_loop_vinfo == NULL);
@@ -2679,6 +2806,21 @@ vect_analyze_loop (class loop *loop, vec_info_shared
*shared)
}
}
+ /* If the original loop can use vector access with length but we still
+ get true vect_epilogue here, it would try vector access with length
+ on epilogue and with the same mode. */
+ if (vect_epilogues && loop_vinfo
+ && LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+ {
+ gcc_assert (!LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo));
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "***** Re-trying analysis with same vector"
+ " mode %s for epilogue with length.\n",
+ GET_MODE_NAME (loop_vinfo->vector_mode));
+ continue;
+ }
+
if (mode_i < vector_modes.length ()
&& VECTOR_MODE_P (autodetected_vector_mode)
&& (related_vector_mode (vector_modes[mode_i],
@@ -3519,6 +3661,11 @@ vect_estimate_min_profitable_iters (loop_vec_info
loop_vinfo,
target_cost_data, num_masks - 1, vector_stmt,
NULL, NULL_TREE, 0, vect_body);
}
+ else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+ {
+ peel_iters_prologue = 0;
+ peel_iters_epilogue = 0;
+ }
else if (npeel < 0)
{
peel_iters_prologue = assumed_vf / 2;
@@ -3809,6 +3956,7 @@ vect_estimate_min_profitable_iters (loop_vec_info
loop_vinfo,
min_profitable_iters);
if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
&& min_profitable_iters < (assumed_vf + peel_iters_prologue))
/* We want the vectorized loop to execute at least once. */
min_profitable_iters = assumed_vf + peel_iters_prologue;
@@ -6761,6 +6909,16 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"using an in-order (fold-left) reduction.\n");
STMT_VINFO_TYPE (orig_stmt_of_analysis) = cycle_phi_info_type;
+
+ if (loop_vinfo && LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use vector access with length due to"
+ " reduction operation.\n");
+ LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+ }
+
/* All but single defuse-cycle optimized, lane-reducing and fold-left
reductions go through their own vectorizable_* routines. */
if (!single_defuse_cycle
@@ -8041,6 +8199,16 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
1, vectype, NULL);
}
}
+
+ if (loop_vinfo && LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+ {
+ LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use vector access with length due to"
+ " live operation.\n");
+ }
+
return true;
}
@@ -8354,6 +8522,66 @@ vect_get_loop_mask (gimple_stmt_iterator *gsi,
vec_loop_masks *masks,
return mask;
}
+/* Record that LOOP_VINFO would need LENS to contain a sequence of NVECTORS
+ lengths for vector access with length that each control a vector of type
+ VECTYPE. */
+
+void
+vect_record_loop_len (loop_vec_info loop_vinfo, vec_loop_lens *lens,
+ unsigned int nvectors, tree vectype)
+{
+ gcc_assert (nvectors != 0);
+ if (lens->length () < nvectors)
+ lens->safe_grow_cleared (nvectors);
+ rgroup_lens *rgl = &(*lens)[nvectors - 1];
+
+ /* The number of scalars per iteration, total bytes of them and the number of
+ vectors are both compile-time constants. */
+ poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ unsigned int nbytes_per_iter
+ = exact_div (nvectors * vector_size, vf).to_constant ();
+
+ /* The one associated to the same nvectors should have the same bytes per
+ iteration. */
+ if (!rgl->vec_type)
+ {
+ rgl->vec_type = vectype;
+ rgl->nbytes_per_iter = nbytes_per_iter;
+ }
+ else
+ gcc_assert (rgl->nbytes_per_iter == nbytes_per_iter);
+}
+
+/* Given a complete set of length LENS, extract length number INDEX for an
+ rgroup that operates on NVECTORS vectors, where 0 <= INDEX < NVECTORS. */
+
+tree
+vect_get_loop_len (vec_loop_lens *lens, unsigned int nvectors, unsigned int
index)
+{
+ rgroup_lens *rgl = &(*lens)[nvectors - 1];
+
+ /* Populate the rgroup's len array, if this is the first time we've
+ used it. */
+ if (rgl->lens.is_empty ())
+ {
+ rgl->lens.safe_grow_cleared (nvectors);
+ for (unsigned int i = 0; i < nvectors; ++i)
+ {
+ scalar_int_mode len_mode = targetm.vectorize.length_mode;
+ unsigned int len_prec = GET_MODE_PRECISION (len_mode);
+ tree len_type = build_nonstandard_integer_type (len_prec, true);
+ tree len = make_temp_ssa_name (len_type, NULL, "loop_len");
+
+ /* Provide a dummy definition until the real one is available. */
+ SSA_NAME_DEF_STMT (len) = gimple_build_nop ();
+ rgl->lens[i] = len;
+ }
+ }
+
+ return rgl->lens[index];
+}
+
/* Scale profiling counters by estimation for LOOP which is vectorized
by factor VF. */
@@ -8714,6 +8942,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple
*loop_vectorized_call)
{
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
&& known_eq (lowest_vf, vf))
{
niters_vector
@@ -8881,7 +9110,9 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple
*loop_vectorized_call)
/* True if the final iteration might not handle a full vector's
worth of scalar iterations. */
- bool final_iter_may_be_partial = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+ bool final_iter_may_be_partial
+ = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
/* The minimum number of iterations performed by the epilogue. This
is 1 when peeling for gaps because we always need a final scalar
iteration. */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index e7822c44951..d6be39e1831 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1879,6 +1879,66 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree
vectype,
gcc_unreachable ();
}
+/* Check whether a load or store statement in the loop described by
+ LOOP_VINFO is possible to go with length. This is testing whether
+ the vectorizer pass has the appropriate support, as well as whether
+ the target does.
+
+ VLS_TYPE says whether the statement is a load or store and VECTYPE
+ is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
+ says how the load or store is going to be implemented and GROUP_SIZE
+ is the number of load or store statements in the containing group.
+
+ Clear LOOP_VINFO_CAN_WITH_LENGTH_P if it can't go with length, otherwise
+ record the required length types. */
+
+static void
+check_load_store_with_len (loop_vec_info loop_vinfo, tree vectype,
+ vec_load_store_type vls_type, int group_size,
+ vect_memory_access_type memory_access_type)
+{
+ /* Invariant loads need no special support. */
+ if (memory_access_type == VMAT_INVARIANT)
+ return;
+
+ if (memory_access_type != VMAT_CONTIGUOUS
+ && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use vector access with length"
+ " because an access isn't contiguous.\n");
+ LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+ return;
+ }
+
+ machine_mode vecmode = TYPE_MODE (vectype);
+ bool is_load = (vls_type == VLS_LOAD);
+ optab op = is_load ? lenload_optab : lenstore_optab;
+
+ if (!VECTOR_MODE_P (vecmode)
+ || !convert_optab_handler (op, vecmode, targetm.vectorize.length_mode))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use vector access with length because"
+ " the target doesn't have the appropriate"
+ " load or store with length.\n");
+ LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+ return;
+ }
+
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ unsigned int nvectors;
+
+ if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
+ vect_record_loop_len (loop_vinfo, lens, nvectors, vectype);
+ else
+ gcc_unreachable ();
+}
+
/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
that needs to be applied to all loads and stores in a vectorized loop.
@@ -7532,6 +7592,10 @@ vectorizable_store (vec_info *vinfo,
check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
memory_access_type, &gs_info, mask);
+ if (loop_vinfo && LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+ check_load_store_with_len (loop_vinfo, vectype, vls_type, group_size,
+ memory_access_type);
+
if (slp_node
&& !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
vectype))
@@ -8068,6 +8132,15 @@ vectorizable_store (vec_info *vinfo,
= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
? &LOOP_VINFO_MASKS (loop_vinfo)
: NULL);
+
+ vec_loop_lens *loop_lens
+ = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
+ ? &LOOP_VINFO_LENS (loop_vinfo)
+ : NULL);
+
+ /* Shouldn't go with length if fully masked. */
+ gcc_assert (!loop_lens || (loop_lens && !loop_masks));
+
/* Targets with store-lane instructions must not require explicit
realignment. vect_supportable_dr_alignment always returns either
dr_aligned or dr_unaligned_supported for masked operations. */
@@ -8320,10 +8393,15 @@ vectorizable_store (vec_info *vinfo,
unsigned HOST_WIDE_INT align;
tree final_mask = NULL_TREE;
+ tree final_len = NULL_TREE;
if (loop_masks)
final_mask = vect_get_loop_mask (gsi, loop_masks,
vec_num * ncopies,
vectype, vec_num * j + i);
+ else if (loop_lens)
+ final_len = vect_get_loop_len (loop_lens, vec_num * ncopies,
+ vec_num * j + i);
+
if (vec_mask)
final_mask = prepare_load_store_mask (mask_vectype, final_mask,
vec_mask, gsi);
@@ -8403,6 +8481,17 @@ vectorizable_store (vec_info *vinfo,
new_stmt_info
= vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
}
+ else if (final_len)
+ {
+ align = least_bit_hwi (misalign | align);
+ tree ptr = build_int_cst (ref_type, align);
+ gcall *call
+ = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
+ ptr, final_len, vec_oprnd);
+ gimple_call_set_nothrow (call, true);
+ new_stmt_info
+ = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+ }
else
{
data_ref = fold_build2 (MEM_REF, vectype,
@@ -8839,6 +8928,10 @@ vectorizable_load (vec_info *vinfo,
check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
memory_access_type, &gs_info, mask);
+ if (loop_vinfo && LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+ check_load_store_with_len (loop_vinfo, vectype, VLS_LOAD, group_size,
+ memory_access_type);
+
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
slp_node, cost_vec);
@@ -8937,6 +9030,7 @@ vectorizable_load (vec_info *vinfo,
gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
gcc_assert (!nested_in_vect_loop);
+ gcc_assert (!LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo));
if (grouped_load)
{
@@ -9234,6 +9328,15 @@ vectorizable_load (vec_info *vinfo,
= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
? &LOOP_VINFO_MASKS (loop_vinfo)
: NULL);
+
+ vec_loop_lens *loop_lens
+ = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
+ ? &LOOP_VINFO_LENS (loop_vinfo)
+ : NULL);
+
+ /* Shouldn't go with length if fully masked. */
+ gcc_assert (!loop_lens || (loop_lens && !loop_masks));
+
/* Targets with store-lane instructions must not require explicit
realignment. vect_supportable_dr_alignment always returns either
dr_aligned or dr_unaligned_supported for masked operations. */
@@ -9555,15 +9658,20 @@ vectorizable_load (vec_info *vinfo,
for (i = 0; i < vec_num; i++)
{
tree final_mask = NULL_TREE;
+ tree final_len = NULL_TREE;
if (loop_masks
&& memory_access_type != VMAT_INVARIANT)
final_mask = vect_get_loop_mask (gsi, loop_masks,
vec_num * ncopies,
vectype, vec_num * j + i);
+ else if (loop_lens && memory_access_type != VMAT_INVARIANT)
+ final_len = vect_get_loop_len (loop_lens, vec_num * ncopies,
+ vec_num * j + i);
if (vec_mask)
final_mask = prepare_load_store_mask (mask_vectype, final_mask,
vec_mask, gsi);
+
if (i > 0)
dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
gsi, stmt_info, bump);
@@ -9629,6 +9737,18 @@ vectorizable_load (vec_info *vinfo,
new_stmt = call;
data_ref = NULL_TREE;
}
+ else if (final_len)
+ {
+ align = least_bit_hwi (misalign | align);
+ tree ptr = build_int_cst (ref_type, align);
+ gcall *call
+ = gimple_build_call_internal (IFN_LEN_LOAD, 3,
+ dataref_ptr, ptr,
+ final_len);
+ gimple_call_set_nothrow (call, true);
+ new_stmt = call;
+ data_ref = NULL_TREE;
+ }
else
{
tree ltype = vectype;
@@ -12480,3 +12600,35 @@ vect_get_vector_types_for_stmt (vec_info *vinfo,
stmt_vec_info stmt_info,
*nunits_vectype_out = nunits_vectype;
return opt_result::success ();
}
+
+/* Generate and return statement sequence that sets vector length LEN that is:
+
+ min_of_start_and_end = min (START_INDEX, END_INDEX);
+ left_bytes = END_INDEX - min_of_start_and_end;
+ rhs = min (left_bytes, VECTOR_SIZE);
+ LEN = rhs;
+
+ TODO: for now, rs6000 supported vector with length only cares 8-bits, which
+ means if we have left_bytes larger than 255, it can't be saturated to vector
+ size. One target hook can be provided if other ports don't suffer this.
+*/
+
+gimple_seq
+vect_gen_len (tree len, tree start_index, tree end_index, tree vector_size)
+{
+ gimple_seq stmts = NULL;
+ tree len_type = TREE_TYPE (len);
+ gcc_assert (TREE_TYPE (start_index) == len_type);
+
+ tree min = fold_build2 (MIN_EXPR, len_type, start_index, end_index);
+ tree left_bytes = fold_build2 (MINUS_EXPR, len_type, end_index, min);
+ left_bytes = fold_build2 (MIN_EXPR, len_type, left_bytes, vector_size);
+
+ tree rhs = force_gimple_operand (left_bytes, &stmts, true, NULL_TREE);
+ gimple *new_stmt = gimple_build_assign (len, rhs);
+ gimple_stmt_iterator i = gsi_last (stmts);
+ gsi_insert_after_without_update (&i, new_stmt, GSI_CONTINUE_LINKING);
+
+ return stmts;
+}
+
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 2eb3ab5d280..774d5025639 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -476,6 +476,21 @@ struct rgroup_masks {
typedef auto_vec<rgroup_masks> vec_loop_masks;
+/* Similar to masks above, the lengths needed by rgroups with nV vectors. */
+struct rgroup_lens
+{
+ /* The total bytes for any nS per iteration. */
+ unsigned int nbytes_per_iter;
+
+ /* Any vector type to use these lengths. */
+ tree vec_type;
+
+ /* A vector of nV lengths, in iteration order. */
+ vec<tree> lens;
+};
+
+typedef auto_vec<rgroup_lens> vec_loop_lens;
+
typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec;
/*-----------------------------------------------------------------*/
@@ -523,6 +538,10 @@ public:
on inactive scalars. */
vec_loop_masks masks;
+ /* The lengths that a loop with length should use to avoid operating
+ on inactive scalars. */
+ vec_loop_lens lens;
+
/* Set of scalar conditions that have loop mask applied. */
scalar_cond_masked_set_type scalar_cond_masked_set;
@@ -626,6 +645,12 @@ public:
/* True if have decided to use a fully-masked loop. */
bool fully_masked_p;
+ /* Records whether we still have the option of using a length access loop.
*/
+ bool can_with_length_p;
+
+ /* True if have decided to use length access for the loop fully. */
+ bool fully_with_length_p;
+
/* When we have grouped data accesses with gaps, we may introduce invalid
memory accesses. We peel the last iteration of the loop to prevent
this. */
@@ -689,6 +714,9 @@ public:
#define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable
#define LOOP_VINFO_CAN_FULLY_MASK_P(L) (L)->can_fully_mask_p
#define LOOP_VINFO_FULLY_MASKED_P(L) (L)->fully_masked_p
+#define LOOP_VINFO_CAN_WITH_LENGTH_P(L) (L)->can_with_length_p
+#define LOOP_VINFO_FULLY_WITH_LENGTH_P(L) (L)->fully_with_length_p
+#define LOOP_VINFO_LENS(L) (L)->lens
#define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor
#define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor
#define LOOP_VINFO_MASKS(L) (L)->masks
@@ -1842,6 +1870,10 @@ extern void vect_record_loop_mask (loop_vec_info,
vec_loop_masks *,
unsigned int, tree, tree);
extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *,
unsigned int, tree, unsigned int);
+extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
+ tree);
+extern tree vect_get_loop_len (vec_loop_lens *, unsigned int, unsigned int);
+extern gimple_seq vect_gen_len (tree, tree, tree, tree);
extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
/* Drive for loop transformation stage. */
--