The following removes the redundant SLP_TREE_NUMBER_OF_VEC_STMTS,
replacing it with vect_get_num_copies. Previously it was already
made sure that all setters adhere to that.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
* tree-vectorizer.h (_slp_tree::vec_stmts_size): Remove.
(SLP_TREE_NUMBER_OF_VEC_STMTS): Likewise.
* tree-vect-loop.cc (vectorizable_reduction): Adjust.
(vect_transform_cycle_phi): Likewise.
(vect_transform_lc_phi): Likewise.
(vectorizable_recurr): Likewise.
(vectorizable_induction): Likewise.
(vectorizable_live_operation): Likewise.
* tree-vect-slp.cc (_slp_tree::_slp_tree): Do not set
SLP_TREE_NUMBER_OF_VEC_STMTS.
(vect_slp_analyze_node_operations_1): Likewise.
(vect_slp_analyze_node_operations): Likewise.
(vect_prologue_cost_for_slp): Adjust.
(vect_create_constant_vectors): Likewise.
(vect_get_slp_vect_def): Likewise.
(vect_transform_slp_perm_load_1): Likewise.
(vectorizable_slp_permutation_1): Likewise.
(vect_schedule_slp_node): Likewise.
(vectorize_slp_instance_root_stmt): Likewise.
* tree-vect-stmts.cc (vect_model_simple_cost): Likewise.
(vectorizable_bswap): Likewise.
(vectorizable_call): Likewise.
(vectorizable_conversion): Likewise.
(vectorizable_shift): Likewise.
(vectorizable_operation): Likewise.
(vectorizable_store): Likewise.
(vectorizable_load): Likewise.
(vectorizable_condition): Likewise.
(vectorizable_early_exit): Likewise.
---
gcc/tree-vect-loop.cc | 19 +++++------
gcc/tree-vect-slp.cc | 51 ++++++++++------------------
gcc/tree-vect-stmts.cc | 76 ++++++++++++++++++++----------------------
gcc/tree-vectorizer.h | 6 ----
4 files changed, 63 insertions(+), 89 deletions(-)
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 015a13734dd..0b5f484810c 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7380,7 +7380,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
if (STMT_VINFO_LIVE_P (phi_info))
return false;
- ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ ncopies = vect_get_num_copies (loop_vinfo, slp_node);
gcc_assert (ncopies >= 1);
@@ -8253,7 +8253,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
/* Leave the scalar phi in place. */
return true;
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ vec_num = vect_get_num_copies (loop_vinfo, slp_node);
/* Check whether we should use a single PHI node and accumulate
vectors to one before the backedge. */
@@ -8508,7 +8508,7 @@ vect_transform_lc_phi (loop_vec_info loop_vinfo,
/* Vectorizes PHIs. */
bool
-vectorizable_phi (bb_vec_info,
+vectorizable_phi (bb_vec_info vinfo,
stmt_vec_info stmt_info,
slp_tree slp_node, stmt_vector_for_cost *cost_vec)
{
@@ -8559,7 +8559,7 @@ vectorizable_phi (bb_vec_info,
for the scalar and the vector PHIs. This avoids artificially
favoring the vector path (but may pessimize it in some cases). */
if (gimple_phi_num_args (as_a <gphi *> (stmt_info->stmt)) > 1)
- record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ record_stmt_cost (cost_vec, vect_get_num_copies (vinfo, slp_node),
vector_stmt, slp_node, vectype, 0, vect_body);
SLP_TREE_TYPE (slp_node) = phi_info_type;
return true;
@@ -8663,7 +8663,7 @@ vectorizable_recurr (loop_vec_info loop_vinfo,
stmt_vec_info stmt_info,
return false;
tree vectype = SLP_TREE_VECTYPE (slp_node);
- unsigned ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ unsigned ncopies = vect_get_num_copies (loop_vinfo, slp_node);
poly_int64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
unsigned dist = SLP_TREE_LANES (slp_node);
/* We need to be able to make progress with a single vector. */
@@ -9572,6 +9572,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
}
}
+ unsigned nvects = vect_get_num_copies (loop_vinfo, slp_node);
if (cost_vec) /* transformation not required. */
{
unsigned inside_cost = 0, prologue_cost = 0;
@@ -9590,8 +9591,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
return false;
}
/* loop cost for vec_loop. */
- inside_cost = record_stmt_cost (cost_vec,
- SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ inside_cost = record_stmt_cost (cost_vec, nvects,
vector_stmt, slp_node, 0, vect_body);
/* prologue cost for vec_init (if not nested) and step. */
prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop,
@@ -9651,7 +9651,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
}
/* Now generate the IVs. */
- unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
gcc_assert (multiple_p (nunits * nvects, group_size));
unsigned nivs;
unsigned HOST_WIDE_INT const_nunits;
@@ -10201,7 +10200,7 @@ vectorizable_live_operation (vec_info *vinfo,
stmt_vec_info stmt_info,
all the slp vectors. Calculate which slp vector it is and the index
within. */
int num_scalar = SLP_TREE_LANES (slp_node);
- int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ int num_vec = vect_get_num_copies (vinfo, slp_node);
poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index;
/* Calculate which vector contains the result, and which lane of
@@ -10229,7 +10228,7 @@ vectorizable_live_operation (vec_info *vinfo,
stmt_vec_info stmt_info,
"the loop.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
- else if (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)
+ else if (num_vec > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 895fb88ab7f..f553e8fba19 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -112,7 +112,6 @@ _slp_tree::_slp_tree ()
SLP_TREE_SCALAR_STMTS (this) = vNULL;
SLP_TREE_SCALAR_OPS (this) = vNULL;
SLP_TREE_VEC_DEFS (this) = vNULL;
- SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0;
SLP_TREE_CHILDREN (this) = vNULL;
SLP_TREE_LOAD_PERMUTATION (this) = vNULL;
SLP_TREE_LANE_PERMUTATION (this) = vNULL;
@@ -8042,17 +8041,6 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo,
slp_tree node,
slp_instance node_instance,
stmt_vector_for_cost *cost_vec)
{
- /* Calculate the number of vector statements to be created for the scalar
- stmts in this node. It is the number of scalar elements in one scalar
- iteration (DR_GROUP_SIZE) multiplied by VF divided by the number of
- elements in a vector. For single-defuse-cycle, lane-reducing op, and
- PHI statement that starts reduction comprised of only lane-reducing ops,
- the number is more than effective vector statements actually required. */
- if (SLP_TREE_VECTYPE (node))
- SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
- else
- SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
-
/* Handle purely internal nodes. */
if (SLP_TREE_PERMUTE_P (node))
{
@@ -8220,7 +8208,7 @@ vect_scalar_ops_slice_hash::equal (const value_type &s1,
by NODE. */
static void
-vect_prologue_cost_for_slp (slp_tree node,
+vect_prologue_cost_for_slp (vec_info *vinfo, slp_tree node,
stmt_vector_for_cost *cost_vec)
{
/* There's a special case of an existing vector, that costs nothing. */
@@ -8234,14 +8222,15 @@ vect_prologue_cost_for_slp (slp_tree node,
unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
unsigned HOST_WIDE_INT const_nunits;
unsigned nelt_limit;
+ unsigned nvectors = vect_get_num_copies (vinfo, node);
auto ops = &SLP_TREE_SCALAR_OPS (node);
- auto_vec<unsigned int> starts (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
+ auto_vec<unsigned int> starts (nvectors);
if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
&& ! multiple_p (const_nunits, group_size))
{
nelt_limit = const_nunits;
hash_set<vect_scalar_ops_slice_hash> vector_ops;
- for (unsigned int i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); ++i)
+ for (unsigned int i = 0; i < nvectors; ++i)
if (!vector_ops.add ({ ops, i * nelt_limit, nelt_limit }))
starts.quick_push (i * nelt_limit);
}
@@ -8395,10 +8384,8 @@ vect_slp_analyze_node_operations (vec_info *vinfo,
slp_tree node,
continue;
}
- SLP_TREE_NUMBER_OF_VEC_STMTS (child)
- = vect_get_num_copies (vinfo, child);
/* And cost them. */
- vect_prologue_cost_for_slp (child, cost_vec);
+ vect_prologue_cost_for_slp (vinfo, child, cost_vec);
}
/* If this node or any of its children can't be vectorized, try pruning
@@ -10337,7 +10324,7 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree
op_node)
/* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */
vector_type = SLP_TREE_VECTYPE (op_node);
- unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (op_node);
+ unsigned int number_of_vectors = vect_get_num_copies (vinfo, op_node);
SLP_TREE_VEC_DEFS (op_node).create (number_of_vectors);
auto_vec<tree> voprnds (number_of_vectors);
@@ -10562,7 +10549,7 @@ vect_get_slp_vect_def (slp_tree slp_node, unsigned i)
void
vect_get_slp_defs (slp_tree slp_node, vec<tree> *vec_defs)
{
- vec_defs->create (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
+ vec_defs->create (SLP_TREE_VEC_DEFS (slp_node).length ());
vec_defs->splice (SLP_TREE_VEC_DEFS (slp_node));
}
@@ -10616,7 +10603,7 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo,
slp_tree node,
mode = TYPE_MODE (vectype);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
- unsigned int nstmts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+ unsigned int nstmts = vect_get_num_copies (vinfo, node);
/* Initialize the vect stmts of NODE to properly insert the generated
stmts later. */
@@ -10816,7 +10803,7 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo,
slp_tree node,
if (n_loads)
{
if (repeating_p)
- *n_loads = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+ *n_loads = nstmts;
else
{
/* Enforced above when !repeating_p. */
@@ -11065,7 +11052,8 @@ vectorizable_slp_permutation_1 (vec_info *vinfo,
gimple_stmt_iterator *gsi,
unsigned vec_idx = (SLP_TREE_LANE_PERMUTATION (node)[0].second
/ SLP_TREE_LANES (node));
unsigned vec_num = SLP_TREE_LANES (child) / SLP_TREE_LANES (node);
- for (unsigned i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); ++i)
+ unsigned nvectors = vect_get_num_copies (vinfo, node);
+ for (unsigned i = 0; i < nvectors; ++i)
{
tree def = SLP_TREE_VEC_DEFS (child)[i * vec_num + vec_idx];
node->push_vec_def (def);
@@ -11406,14 +11394,11 @@ vect_schedule_slp_node (vec_info *vinfo,
return;
}
- gcc_assert (SLP_TREE_VEC_DEFS (node).is_empty ());
-
stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
- gcc_assert (!SLP_TREE_VECTYPE (node)
- || SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
- if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0)
- SLP_TREE_VEC_DEFS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
+ gcc_assert (SLP_TREE_VEC_DEFS (node).is_empty ());
+ if (SLP_TREE_VECTYPE (node))
+ SLP_TREE_VEC_DEFS (node).create (vect_get_num_copies (vinfo, node));
if (!SLP_TREE_PERMUTE_P (node) && STMT_VINFO_DATA_REF (stmt_info))
{
@@ -11675,7 +11660,7 @@ vectorize_slp_instance_root_stmt (vec_info *vinfo,
slp_tree node, slp_instance i
if (instance->kind == slp_inst_kind_ctor)
{
- if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
+ if (SLP_TREE_VEC_DEFS (node).length () == 1)
{
tree vect_lhs = SLP_TREE_VEC_DEFS (node)[0];
tree root_lhs = gimple_get_lhs (instance->root_stmts[0]->stmt);
@@ -11685,13 +11670,13 @@ vectorize_slp_instance_root_stmt (vec_info *vinfo,
slp_tree node, slp_instance i
vect_lhs);
rstmt = gimple_build_assign (root_lhs, vect_lhs);
}
- else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
+ else
{
- int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+ gcc_assert (SLP_TREE_VEC_DEFS (node).length () > 1);
tree child_def;
int j;
vec<constructor_elt, va_gc> *v;
- vec_alloc (v, nelts);
+ vec_alloc (v, SLP_TREE_VEC_DEFS (node).length ());
/* A CTOR can handle V16HI composition from VNx8HI so we
do not need to convert vector elements if the types
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 6274956e2a5..07291dfea41 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -916,7 +916,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo,
bool *fatal)
of the same KIND. */
static void
-vect_model_simple_cost (vec_info *, int n, slp_tree node,
+vect_model_simple_cost (vec_info *vinfo, int n, slp_tree node,
stmt_vector_for_cost *cost_vec,
vect_cost_for_stmt kind = vector_stmt)
{
@@ -924,7 +924,7 @@ vect_model_simple_cost (vec_info *, int n, slp_tree node,
gcc_assert (cost_vec != NULL);
- n *= SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+ n *= vect_get_num_copies (vinfo, node);
/* Pass the inside-of-loop statements to the target-specific cost model. */
inside_cost += record_stmt_cost (cost_vec, n, kind, node, 0, vect_body);
@@ -3120,7 +3120,7 @@ vectorizable_bswap (vec_info *vinfo,
record_stmt_cost (cost_vec,
1, vector_stmt, slp_node, 0, vect_prologue);
record_stmt_cost (cost_vec,
- SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ vect_get_num_copies (vinfo, slp_node),
vec_perm, slp_node, 0, vect_body);
return true;
}
@@ -3438,6 +3438,7 @@ vectorizable_call (vec_info *vinfo,
int len_opno = internal_fn_len_index (cond_len_fn);
vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
+ unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);
if (cost_vec) /* transformation not required. */
{
for (i = 0; i < nargs; ++i)
@@ -3474,7 +3475,6 @@ vectorizable_call (vec_info *vinfo,
}
else
{
- unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
tree scalar_mask = NULL_TREE;
if (mask_opno >= 0)
scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
@@ -3531,7 +3531,7 @@ vectorizable_call (vec_info *vinfo,
/* Build argument list for the vectorized call. */
if (cfn == CFN_GOMP_SIMD_LANE)
{
- for (i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++i)
+ for (i = 0; i < nvectors; ++i)
{
/* ??? For multi-lane SLP we'd need to build
{ 0, 0, .., 1, 1, ... }. */
@@ -5390,6 +5390,7 @@ vectorizable_conversion (vec_info *vinfo,
return false;
}
DUMP_VECT_SCOPE ("vectorizable_conversion");
+ unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);
if (modifier == NONE)
{
SLP_TREE_TYPE (slp_node) = type_conversion_vec_info_type;
@@ -5400,7 +5401,6 @@ vectorizable_conversion (vec_info *vinfo,
{
SLP_TREE_TYPE (slp_node) = type_demotion_vec_info_type;
/* The final packing step produces one vector result per copy. */
- unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
vect_model_promotion_demotion_cost (slp_node, nvectors,
multi_step_cvt, cost_vec,
widen_arith);
@@ -5411,9 +5411,8 @@ vectorizable_conversion (vec_info *vinfo,
/* The initial unpacking step produces two vector results
per copy. MULTI_STEP_CVT is 0 for a single conversion,
so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
- unsigned int nvectors
- = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt;
- vect_model_promotion_demotion_cost (slp_node, nvectors,
+ vect_model_promotion_demotion_cost (slp_node,
+ nvectors >> multi_step_cvt,
multi_step_cvt, cost_vec,
widen_arith);
}
@@ -5943,7 +5942,7 @@ vectorizable_shift (vec_info *vinfo,
scalar shift operand but code-generation below simply always
takes the first. */
if (dt[1] == vect_internal_def
- && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ && maybe_ne (nunits_out * vect_get_num_copies (vinfo, slp_node),
stmts.length ()))
scalar_shift_arg = false;
@@ -6102,6 +6101,7 @@ vectorizable_shift (vec_info *vinfo,
/* Handle def. */
vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ unsigned nvectors = vect_get_num_copies (vinfo, slp_node);
if (scalar_shift_arg && dt[1] != vect_internal_def)
{
/* Vector shl and shr insn patterns can be defined with scalar
@@ -6115,15 +6115,14 @@ vectorizable_shift (vec_info *vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"operand 1 using scalar mode.\n");
vec_oprnd1 = op1;
- vec_oprnds1.create (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
+ vec_oprnds1.create (nvectors);
vec_oprnds1.quick_push (vec_oprnd1);
- /* Store vec_oprnd1 for every vector stmt to be created.
- We check during the analysis that all the shift arguments
- are the same.
- TODO: Allow different constants for different vector
- stmts generated for an SLP instance. */
- for (k = 0;
- k < SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) - 1; k++)
+ /* Store vec_oprnd1 for every vector stmt to be created.
+ We check during the analysis that all the shift arguments
+ are the same.
+ TODO: Allow different constants for different vector
+ stmts generated for an SLP instance. */
+ for (k = 0; k < nvectors - 1; k++)
vec_oprnds1.quick_push (vec_oprnd1);
}
}
@@ -6141,8 +6140,8 @@ vectorizable_shift (vec_info *vinfo,
gsi);
vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
gsi);
- vec_oprnds1.create (slp_node->vec_stmts_size);
- for (k = 0; k < slp_node->vec_stmts_size; k++)
+ vec_oprnds1.create (nvectors);
+ for (k = 0; k < nvectors; k++)
vec_oprnds1.quick_push (vec_oprnd1);
}
else if (dt[1] == vect_constant_def)
@@ -6393,7 +6392,7 @@ vectorizable_operation (vec_info *vinfo,
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. */
- auto vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ auto vec_num = vect_get_num_copies (vinfo, slp_node);
/* Reject attempts to combine mask types with nonmask types, e.g. if
we have an AND between a (nonmask) boolean loaded from memory and
@@ -6515,7 +6514,7 @@ vectorizable_operation (vec_info *vinfo,
in the prologue and (mis-)costs one of the stmts as
vector stmt. See below for the actual lowering that will
be applied. */
- unsigned n = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ unsigned n = vect_get_num_copies (vinfo, slp_node);
switch (code)
{
case PLUS_EXPR:
@@ -7855,11 +7854,12 @@ vectorizable_store (vec_info *vinfo,
}
else
vf = 1;
+ vec_num = vect_get_num_copies (vinfo, slp_node);
/* FORNOW. This restriction should be relaxed. */
if (loop
&& nested_in_vect_loop_p (loop, stmt_info)
- && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)
+ && vec_num > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7949,7 +7949,7 @@ vectorizable_store (vec_info *vinfo,
{
first_stmt_info = stmt_info;
first_dr_info = dr_info;
- group_size = vec_num = 1;
+ group_size = 1;
}
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && cost_vec)
@@ -8026,9 +8026,6 @@ vectorizable_store (vec_info *vinfo,
|| !nested_in_vect_loop_p (loop, stmt_info));
grouped_store = false;
- /* VEC_NUM is the number of vect stmts to be created for this
- group. */
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
gcc_assert (!STMT_VINFO_GROUPED_ACCESS (first_stmt_info)
|| (DR_GROUP_FIRST_ELEMENT (first_stmt_info) == first_stmt_info));
@@ -8201,7 +8198,7 @@ vectorizable_store (vec_info *vinfo,
if (nstores > 1)
align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
- int ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ int ncopies = vec_num;
if (!costing_p)
{
@@ -8412,7 +8409,7 @@ vectorizable_store (vec_info *vinfo,
/* For costing some adjacent vector stores, we'd like to cost with
the total number of them once instead of cost each one by one. */
unsigned int n_adjacent_stores = 0;
- int ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) / group_size;
+ int ncopies = vec_num / group_size;
for (j = 0; j < ncopies; j++)
{
if (j == 0)
@@ -9398,9 +9395,10 @@ vectorizable_load (vec_info *vinfo,
else
vf = 1;
+ vec_num = vect_get_num_copies (vinfo, slp_node);
+
/* FORNOW. This restriction should be relaxed. */
- if (nested_in_vect_loop
- && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)
+ if (nested_in_vect_loop && vec_num > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -9701,7 +9699,7 @@ vectorizable_load (vec_info *vinfo,
vectype, &gsi2);
}
gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
- for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
+ for (j = 0; j < (int) vec_num; ++j)
slp_node->push_vec_def (new_stmt);
return true;
}
@@ -9896,7 +9894,7 @@ vectorizable_load (vec_info *vinfo,
dr_chain.create (ncopies);
}
else
- ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ ncopies = vec_num;
unsigned int group_el = 0;
unsigned HOST_WIDE_INT
@@ -10071,7 +10069,7 @@ vectorizable_load (vec_info *vinfo,
/* We do not support grouped accesses in a nested loop,
instead the access is contiguous but it might be
permuted. No gap adjustment is needed though. */
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ ;
else if (slp_perm
&& (group_size != scalar_lanes
|| !multiple_p (nunits, group_size)))
@@ -10085,7 +10083,6 @@ vectorizable_load (vec_info *vinfo,
}
else
{
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
group_gap_adj = group_size - scalar_lanes;
}
@@ -10095,10 +10092,9 @@ vectorizable_load (vec_info *vinfo,
{
first_stmt_info = stmt_info;
first_dr_info = dr_info;
- group_size = vec_num = 1;
+ group_size = 1;
group_gap_adj = 0;
ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
}
vec_loop_masks *loop_masks
@@ -10267,7 +10263,7 @@ vectorizable_load (vec_info *vinfo,
/* For costing some adjacent vector loads, we'd like to cost with
the total number of them once instead of cost each one by one. */
unsigned int n_adjacent_loads = 0;
- int ncopies = slp_node->vec_stmts_size / group_size;
+ int ncopies = vec_num / group_size;
for (j = 0; j < ncopies; j++)
{
if (costing_p)
@@ -11600,7 +11596,7 @@ vectorizable_condition (vec_info *vinfo,
tree vectype = SLP_TREE_VECTYPE (slp_node);
tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
- int vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ int vec_num = vect_get_num_copies (vinfo, slp_node);
cond_expr = gimple_assign_rhs1 (stmt);
gcc_assert (! COMPARISON_CLASS_P (cond_expr));
@@ -12332,7 +12328,7 @@ vectorizable_early_exit (loop_vec_info loop_vinfo,
stmt_vec_info stmt_info,
return false;
machine_mode mode = TYPE_MODE (vectype);
- int vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ int vec_num = vect_get_num_copies (loop_vinfo, slp_node);
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index d017d0aa553..46d192e0bc8 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -330,11 +330,6 @@ struct _slp_tree {
tree vectype;
/* Vectorized defs. */
vec<tree> vec_defs;
- /* Number of vector stmts that are created to replace the group of scalar
- stmts. It is calculated during the transformation phase as the number of
- scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF
- divided by vector size. */
- unsigned int vec_stmts_size;
/* Reference count in the SLP graph. */
unsigned int refcnt;
@@ -444,7 +439,6 @@ public:
#define SLP_TREE_SCALAR_OPS(S) (S)->ops
#define SLP_TREE_REF_COUNT(S) (S)->refcnt
#define SLP_TREE_VEC_DEFS(S) (S)->vec_defs
-#define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size
#define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
#define SLP_TREE_LANE_PERMUTATION(S) (S)->lane_permutation
#define SLP_TREE_DEF_TYPE(S) (S)->def_type
--
2.51.0