Richard Biener <rguent...@suse.de> writes:
> This performs local re-computation of participating scalar stmts
> in BB vectorization subgraphs to allow precise computation of
> liveness of scalar stmts after vectorization and thus precise
> costing.  This treats all extern defs as live but continues
> to optimistically handle scalar defs that we think we can handle
> by lane-extraction even though that can still fail late during
> code-generation.
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>
> Any comments?

LGTM.

> Thanks,
> Richard.
>
> 2021-09-02  Richard Biener  <rguent...@suse.de>
>
>       PR tree-optimization/102176
>       * tree-vect-slp.c (vect_slp_gather_vectorized_scalar_stmts):
>       New function.
>       (vect_bb_slp_scalar_cost): Use the computed set of
>       vectorized scalar stmts instead of relying on the out-of-date
>       and not accurate PURE_SLP_STMT.
>       (vect_bb_vectorization_profitable_p): Compute the set
>       of vectorized scalar stmts.
> ---
>  gcc/tree-vect-slp.c | 69 +++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 64 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index fa3566f3d06..024a1c38a23 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -5104,6 +5104,42 @@ vect_bb_partition_graph (bb_vec_info bb_vinfo)
>      }
>  }
>  
> +/* Compute the set of scalar stmts participating in internal and external
> +   nodes.  */
> +
> +static void
> +vect_slp_gather_vectorized_scalar_stmts (vec_info *vinfo, slp_tree node,
> +                                      hash_set<slp_tree> &visited,
> +                                      hash_set<stmt_vec_info> &vstmts,
> +                                      hash_set<stmt_vec_info> &estmts)
> +{
> +  int i;
> +  stmt_vec_info stmt_info;
> +  slp_tree child;
> +
> +  if (visited.add (node))
> +    return;

Probably said this before, but it's confusing that hash_set::add
returns true if it did nothing, whereas bitmap_set_bit returns
false if it did nothing.  (The bitmap version seems more natural IMO.)

Not a problem with this patch of course. :-)

Richard

> +
> +  if (SLP_TREE_DEF_TYPE (node) == vect_internal_def)
> +    {
> +      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
> +     vstmts.add (stmt_info);
> +
> +      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> +     if (child)
> +       vect_slp_gather_vectorized_scalar_stmts (vinfo, child, visited,
> +                                                vstmts, estmts);
> +    }
> +  else
> +    for (tree def : SLP_TREE_SCALAR_OPS (node))
> +      {
> +     stmt_vec_info def_stmt = vinfo->lookup_def (def);
> +     if (def_stmt)
> +       estmts.add (def_stmt);
> +      }
> +}
> +
> +
>  /* Compute the scalar cost of the SLP node NODE and its children
>     and return it.  Do not account defs that are marked in LIFE and
>     update LIFE according to uses of NODE.  */
> @@ -5112,6 +5148,7 @@ static void
>  vect_bb_slp_scalar_cost (vec_info *vinfo,
>                        slp_tree node, vec<bool, va_heap> *life,
>                        stmt_vector_for_cost *cost_vec,
> +                      hash_set<stmt_vec_info> &vectorized_scalar_stmts,
>                        hash_set<slp_tree> &visited)
>  {
>    unsigned i;
> @@ -5148,8 +5185,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
>                 {
>                   stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
>                   if (!use_stmt_info
> -                     || !PURE_SLP_STMT
> -                           (vect_stmt_to_vectorize (use_stmt_info)))
> +                     || !vectorized_scalar_stmts.contains (use_stmt_info))
>                     {
>                       (*life)[i] = true;
>                       break;
> @@ -5212,7 +5248,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
>             subtree_life.safe_splice (*life);
>           }
>         vect_bb_slp_scalar_cost (vinfo, child, &subtree_life, cost_vec,
> -                                visited);
> +                                vectorized_scalar_stmts, visited);
>         subtree_life.truncate (0);
>       }
>      }
> @@ -5254,11 +5290,33 @@ vect_bb_vectorization_profitable_p (bb_vec_info 
> bb_vinfo,
>                             SLP_INSTANCE_TREE (instance), visited);
>      }
>  
> +  /* Compute the set of scalar stmts we know will go away 'locally' when
> +     vectorizing.  This used to be tracked with just PURE_SLP_STMT but that's
> +     not accurate for nodes promoted extern late or for scalar stmts that
> +     are used both in extern defs and in vectorized defs.  */
> +  hash_set<stmt_vec_info> vectorized_scalar_stmts;
> +  hash_set<stmt_vec_info> scalar_stmts_in_externs;
> +  hash_set<slp_tree> visited;
> +  FOR_EACH_VEC_ELT (slp_instances, i, instance)
> +    {
> +      vect_slp_gather_vectorized_scalar_stmts (bb_vinfo,
> +                                            SLP_INSTANCE_TREE (instance),
> +                                            visited,
> +                                            vectorized_scalar_stmts,
> +                                            scalar_stmts_in_externs);
> +      for (stmt_vec_info rstmt : SLP_INSTANCE_ROOT_STMTS (instance))
> +     vectorized_scalar_stmts.add (rstmt);
> +    }
> +  /* Scalar stmts used as defs in external nodes need to be preseved, so
> +     remove them from vectorized_scalar_stmts.  */
> +  for (stmt_vec_info stmt : scalar_stmts_in_externs)
> +    vectorized_scalar_stmts.remove (stmt);
> +
>    /* Calculate scalar cost and sum the cost for the vector stmts
>       previously collected.  */
>    stmt_vector_for_cost scalar_costs = vNULL;
>    stmt_vector_for_cost vector_costs = vNULL;
> -  hash_set<slp_tree> visited;
> +  visited.empty ();
>    FOR_EACH_VEC_ELT (slp_instances, i, instance)
>      {
>        auto_vec<bool, 20> life;
> @@ -5271,7 +5329,8 @@ vect_bb_vectorization_profitable_p (bb_vec_info 
> bb_vinfo,
>                         SLP_INSTANCE_ROOT_STMTS (instance)[0], 0, vect_body);
>        vect_bb_slp_scalar_cost (bb_vinfo,
>                              SLP_INSTANCE_TREE (instance),
> -                            &life, &scalar_costs, visited);
> +                            &life, &scalar_costs, vectorized_scalar_stmts,
> +                            visited);
>        vector_costs.safe_splice (instance->cost_vec);
>        instance->cost_vec.release ();
>      }

Reply via email to