Richard Biener <rguent...@suse.de> writes: > This performs local re-computation of participating scalar stmts > in BB vectorization subgraphs to allow precise computation of > liveness of scalar stmts after vectorization and thus precise > costing. This treats all extern defs as live but continues > to optimistically handle scalar defs that we think we can handle > by lane-extraction even though that can still fail late during > code-generation. > > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. > > Any comments?
LGTM. > Thanks, > Richard. > > 2021-09-02 Richard Biener <rguent...@suse.de> > > PR tree-optimization/102176 > * tree-vect-slp.c (vect_slp_gather_vectorized_scalar_stmts): > New function. > (vect_bb_slp_scalar_cost): Use the computed set of > vectorized scalar stmts instead of relying on the out-of-date > and not accurate PURE_SLP_STMT. > (vect_bb_vectorization_profitable_p): Compute the set > of vectorized scalar stmts. > --- > gcc/tree-vect-slp.c | 69 +++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 64 insertions(+), 5 deletions(-) > > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c > index fa3566f3d06..024a1c38a23 100644 > --- a/gcc/tree-vect-slp.c > +++ b/gcc/tree-vect-slp.c > @@ -5104,6 +5104,42 @@ vect_bb_partition_graph (bb_vec_info bb_vinfo) > } > } > > +/* Compute the set of scalar stmts participating in internal and external > + nodes. */ > + > +static void > +vect_slp_gather_vectorized_scalar_stmts (vec_info *vinfo, slp_tree node, > + hash_set<slp_tree> &visited, > + hash_set<stmt_vec_info> &vstmts, > + hash_set<stmt_vec_info> &estmts) > +{ > + int i; > + stmt_vec_info stmt_info; > + slp_tree child; > + > + if (visited.add (node)) > + return; Probably said this before, but it's confusing that hash_set::add returns true if it did nothing, whereas bitmap_set_bit returns false if it did nothing. (The bitmap version seems more natural IMO.) Not a problem with this patch of course. :-) Richard > + > + if (SLP_TREE_DEF_TYPE (node) == vect_internal_def) > + { > + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) > + vstmts.add (stmt_info); > + > + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) > + if (child) > + vect_slp_gather_vectorized_scalar_stmts (vinfo, child, visited, > + vstmts, estmts); > + } > + else > + for (tree def : SLP_TREE_SCALAR_OPS (node)) > + { > + stmt_vec_info def_stmt = vinfo->lookup_def (def); > + if (def_stmt) > + estmts.add (def_stmt); > + } > +} > + > + > /* Compute the scalar cost of the SLP node NODE and its children > and return it. Do not account defs that are marked in LIFE and > update LIFE according to uses of NODE. */ > @@ -5112,6 +5148,7 @@ static void > vect_bb_slp_scalar_cost (vec_info *vinfo, > slp_tree node, vec<bool, va_heap> *life, > stmt_vector_for_cost *cost_vec, > + hash_set<stmt_vec_info> &vectorized_scalar_stmts, > hash_set<slp_tree> &visited) > { > unsigned i; > @@ -5148,8 +5185,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo, > { > stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt); > if (!use_stmt_info > - || !PURE_SLP_STMT > - (vect_stmt_to_vectorize (use_stmt_info))) > + || !vectorized_scalar_stmts.contains (use_stmt_info)) > { > (*life)[i] = true; > break; > @@ -5212,7 +5248,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo, > subtree_life.safe_splice (*life); > } > vect_bb_slp_scalar_cost (vinfo, child, &subtree_life, cost_vec, > - visited); > + vectorized_scalar_stmts, visited); > subtree_life.truncate (0); > } > } > @@ -5254,11 +5290,33 @@ vect_bb_vectorization_profitable_p (bb_vec_info > bb_vinfo, > SLP_INSTANCE_TREE (instance), visited); > } > > + /* Compute the set of scalar stmts we know will go away 'locally' when > + vectorizing. This used to be tracked with just PURE_SLP_STMT but that's > + not accurate for nodes promoted extern late or for scalar stmts that > + are used both in extern defs and in vectorized defs. */ > + hash_set<stmt_vec_info> vectorized_scalar_stmts; > + hash_set<stmt_vec_info> scalar_stmts_in_externs; > + hash_set<slp_tree> visited; > + FOR_EACH_VEC_ELT (slp_instances, i, instance) > + { > + vect_slp_gather_vectorized_scalar_stmts (bb_vinfo, > + SLP_INSTANCE_TREE (instance), > + visited, > + vectorized_scalar_stmts, > + scalar_stmts_in_externs); > + for (stmt_vec_info rstmt : SLP_INSTANCE_ROOT_STMTS (instance)) > + vectorized_scalar_stmts.add (rstmt); > + } > + /* Scalar stmts used as defs in external nodes need to be preseved, so > + remove them from vectorized_scalar_stmts. */ > + for (stmt_vec_info stmt : scalar_stmts_in_externs) > + vectorized_scalar_stmts.remove (stmt); > + > /* Calculate scalar cost and sum the cost for the vector stmts > previously collected. */ > stmt_vector_for_cost scalar_costs = vNULL; > stmt_vector_for_cost vector_costs = vNULL; > - hash_set<slp_tree> visited; > + visited.empty (); > FOR_EACH_VEC_ELT (slp_instances, i, instance) > { > auto_vec<bool, 20> life; > @@ -5271,7 +5329,8 @@ vect_bb_vectorization_profitable_p (bb_vec_info > bb_vinfo, > SLP_INSTANCE_ROOT_STMTS (instance)[0], 0, vect_body); > vect_bb_slp_scalar_cost (bb_vinfo, > SLP_INSTANCE_TREE (instance), > - &life, &scalar_costs, visited); > + &life, &scalar_costs, vectorized_scalar_stmts, > + visited); > vector_costs.safe_splice (instance->cost_vec); > instance->cost_vec.release (); > }