Richard Biener <rguent...@suse.de> writes:
> The following starts to handle NULL elements in SLP_TREE_SCALAR_STMTS
> with the first candidate being the two-operator nodes where some
> lanes are do-not-care and also do not have a scalar stmt computing
> the result.  I originally added SLP_TREE_SCALAR_STMTS to two-operator
> nodes but this exposes PR115764, so I've split that out.
>
> I have a patch use NULL elements for loads from groups with gaps
> where we get around not doing that by having a load permutation.
>
> I'm currently re-bootstrapping and testing this, it passed multiple
> testing rounds before (with the two-operator change).

Ah, so this might answer a question I was going to ask later,
when I actually had time to do something with the answer.

For SVE:

        short *s;

        s[0] += 1;
        s[2] += 2;
        s[4] += 3;
        s[6] += 4;

(contrived example) could use something like (little-endian):

        index   z1.s, #1, #1
        ptrue   p0.s, vl4
        ldrh    z0.h, p0/z, [s]
        add     z0.h, z0.h, z1.h
        strh    z0.h, p0, [s]

where the .h predicate is 1,0,1,0,1,0,1,0,0s...  The question was going
to be how we should represent that in SLP, but I guess the answer is
simply to create an 8 (or 7?) element SLP tree and fill in the blanks
with nulls?

Thanks,
Richard

>
> Richard.
>
>       * tree-vect-slp.cc (bst_traits::hash): Handle NULL elements
>       in SLP_TREE_SCALAR_STMTS.
>       (vect_print_slp_tree): Likewise.
>       (vect_mark_slp_stmts): Likewise.
>       (vect_mark_slp_stmts_relevant): Likewise.
>       (vect_find_last_scalar_stmt_in_slp): Likewise.
>       (vect_bb_slp_mark_live_stmts): Likewise.
>       (vect_slp_prune_covered_roots): Likewise.
>       (vect_bb_partition_graph_r): Likewise.
>       (vect_remove_slp_scalar_calls): Likewise.
>       (vect_slp_gather_vectorized_scalar_stmts): Likewise.
>       (vect_bb_slp_scalar_cost): Likewise.
>       (vect_contains_pattern_stmt_p): Likewise.
>       (vect_slp_convert_to_external): Likewise.
>       (vect_find_first_scalar_stmt_in_slp): Likewise.
>       (vect_optimize_slp_pass::remove_redundant_permutations): Likewise.
>       (vect_slp_analyze_node_operations_1): Likewise.
>       (vect_schedule_slp_node): Likewise.
>       * tree-vect-stmts.cc (can_vectorize_live_stmts): Likewise.
>       (vectorizable_shift): Likewise.
>       * tree-vect-data-refs.cc (vect_slp_analyze_load_dependences):
>       Handle NULL elements in SLP_TREE_SCALAR_STMTS.
> ---
>  gcc/tree-vect-data-refs.cc |  2 +
>  gcc/tree-vect-slp.cc       | 76 +++++++++++++++++++++++---------------
>  gcc/tree-vect-stmts.cc     | 22 ++++++-----
>  3 files changed, 61 insertions(+), 39 deletions(-)
>
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 959e127c385..39fd887a96b 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -1041,6 +1041,8 @@ vect_slp_analyze_load_dependences (vec_info *vinfo, 
> slp_tree node,
>  
>    for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
>      {
> +      if (! SLP_TREE_SCALAR_STMTS (node)[k])
> +     continue;
>        stmt_vec_info access_info
>       = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
>        if (access_info == first_access_info)
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index b060161c021..7a9aa86f517 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -356,7 +356,7 @@ vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
>    stmt_vec_info stmt_info;
>    unsigned int i;
>    FOR_EACH_VEC_ELT (stmts, i, stmt_info)
> -    if (is_pattern_stmt_p (stmt_info))
> +    if (stmt_info && is_pattern_stmt_p (stmt_info))
>        return true;
>    return false;
>  }
> @@ -1592,7 +1592,7 @@ bst_traits::hash (value_type x)
>  {
>    inchash::hash h;
>    for (unsigned i = 0; i < x.length (); ++i)
> -    h.add_int (gimple_uid (x[i]->stmt));
> +    h.add_int (x[i] ? gimple_uid (x[i]->stmt) : -1);
>    return h.end ();
>  }
>  inline bool
> @@ -2801,9 +2801,12 @@ vect_print_slp_tree (dump_flags_t dump_kind, 
> dump_location_t loc,
>      }
>    if (SLP_TREE_SCALAR_STMTS (node).exists ())
>      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
> -      dump_printf_loc (metadata, user_loc, "\t%sstmt %u %G",
> -                    STMT_VINFO_LIVE_P (stmt_info) ? "[l] " : "",
> -                    i, stmt_info->stmt);
> +      if (stmt_info)
> +     dump_printf_loc (metadata, user_loc, "\t%sstmt %u %G",
> +                      STMT_VINFO_LIVE_P (stmt_info) ? "[l] " : "",
> +                      i, stmt_info->stmt);
> +      else
> +     dump_printf_loc (metadata, user_loc, "\tstmt %u ---\n", i);
>    else
>      {
>        dump_printf_loc (metadata, user_loc, "\t{ ");
> @@ -2944,7 +2947,8 @@ vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> 
> &visited)
>      return;
>  
>    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
> -    STMT_SLP_TYPE (stmt_info) = pure_slp;
> +    if (stmt_info)
> +      STMT_SLP_TYPE (stmt_info) = pure_slp;
>  
>    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>      if (child)
> @@ -2974,11 +2978,12 @@ vect_mark_slp_stmts_relevant (slp_tree node, 
> hash_set<slp_tree> &visited)
>      return;
>  
>    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
> -    {
> -      gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
> -                  || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
> -      STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
> -    }
> +    if (stmt_info)
> +      {
> +     gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
> +                 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
> +     STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
> +      }
>  
>    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>      if (child)
> @@ -3029,10 +3034,11 @@ vect_find_last_scalar_stmt_in_slp (slp_tree node)
>    stmt_vec_info stmt_vinfo;
>  
>    for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
> -    {
> -      stmt_vinfo = vect_orig_stmt (stmt_vinfo);
> -      last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
> -    }
> +    if (stmt_vinfo)
> +      {
> +     stmt_vinfo = vect_orig_stmt (stmt_vinfo);
> +     last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
> +      }
>  
>    return last;
>  }
> @@ -3046,12 +3052,13 @@ vect_find_first_scalar_stmt_in_slp (slp_tree node)
>    stmt_vec_info stmt_vinfo;
>  
>    for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
> -    {
> -      stmt_vinfo = vect_orig_stmt (stmt_vinfo);
> -      if (!first
> -       || get_later_stmt (stmt_vinfo, first) == first)
> -     first = stmt_vinfo;
> -    }
> +    if (stmt_vinfo)
> +      {
> +     stmt_vinfo = vect_orig_stmt (stmt_vinfo);
> +     if (!first
> +         || get_later_stmt (stmt_vinfo, first) == first)
> +       first = stmt_vinfo;
> +      }
>  
>    return first;
>  }
> @@ -6211,6 +6218,7 @@ vect_optimize_slp_pass::remove_redundant_permutations ()
>           {
>             if (j != 0
>                 && (next_load_info != load_info
> +                   || ! load_info
>                     || DR_GROUP_GAP (load_info) != 1))
>               {
>                 subchain_p = false;
> @@ -6827,7 +6835,8 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, 
> slp_tree node,
>        unsigned int i;
>        FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, slp_stmt_info)
>       {
> -       if (STMT_VINFO_LIVE_P (slp_stmt_info)
> +       if (slp_stmt_info
> +           && STMT_VINFO_LIVE_P (slp_stmt_info)
>             && !vectorizable_live_operation (vinfo, slp_stmt_info, node,
>                                              node_instance, i,
>                                              false, cost_vec))
> @@ -6859,6 +6868,10 @@ vect_slp_convert_to_external (vec_info *vinfo, 
> slp_tree node,
>        || VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node)))
>      return false;
>  
> +  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
> +    if (!stmt_info)
> +      return false;
> +
>    if (dump_enabled_p ())
>      dump_printf_loc (MSG_NOTE, vect_location,
>                    "Building vector operands of %p from scalars instead\n",
> @@ -7217,7 +7230,7 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, 
> slp_tree node,
>    stmt_vec_info last_stmt = vect_find_last_scalar_stmt_in_slp (node);
>    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
>      {
> -      if (svisited.contains (stmt_info))
> +      if (!stmt_info || svisited.contains (stmt_info))
>       continue;
>        stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
>        if (STMT_VINFO_IN_PATTERN_P (orig_stmt_info)
> @@ -7406,7 +7419,8 @@ vect_slp_prune_covered_roots (slp_tree node, 
> hash_set<stmt_vec_info> &roots,
>    stmt_vec_info stmt;
>    unsigned i;
>    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> -    roots.remove (vect_orig_stmt (stmt));
> +    if (stmt)
> +      roots.remove (vect_orig_stmt (stmt));
>  
>    slp_tree child;
>    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> @@ -7582,8 +7596,9 @@ vect_bb_partition_graph_r (bb_vec_info bb_vinfo,
>    unsigned i;
>  
>    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
> -    vect_map_to_instance (instance, stmt_info, stmt_to_instance,
> -                       instance_leader);
> +    if (stmt_info)
> +      vect_map_to_instance (instance, stmt_info, stmt_to_instance,
> +                         instance_leader);
>  
>    if (vect_map_to_instance (instance, node, node_to_instance,
>                           instance_leader))
> @@ -7651,7 +7666,8 @@ vect_slp_gather_vectorized_scalar_stmts (vec_info 
> *vinfo, slp_tree node,
>    if (SLP_TREE_DEF_TYPE (node) == vect_internal_def)
>      {
>        FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
> -     vstmts.add (stmt_info);
> +     if (stmt_info)
> +       vstmts.add (stmt_info);
>  
>        FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>       if (child)
> @@ -7691,7 +7707,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
>        ssa_op_iter op_iter;
>        def_operand_p def_p;
>  
> -      if ((*life)[i])
> +      if (!stmt_info || (*life)[i])
>       continue;
>  
>        stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
> @@ -10102,7 +10118,7 @@ vect_schedule_slp_node (vec_info *vinfo,
>        stmt_vec_info slp_stmt_info;
>        unsigned int i;
>        FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, slp_stmt_info)
> -     if (STMT_VINFO_LIVE_P (slp_stmt_info))
> +     if (slp_stmt_info && STMT_VINFO_LIVE_P (slp_stmt_info))
>         {
>           done = vectorizable_live_operation (vinfo, slp_stmt_info, node,
>                                               instance, i, true, NULL);
> @@ -10146,6 +10162,8 @@ vect_remove_slp_scalar_calls (vec_info *vinfo,
>  
>    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
>      {
> +      if (!stmt_info)
> +     continue;
>        gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
>        if (!stmt || gimple_bb (stmt) == NULL)
>       continue;
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index b12b6ada029..742517a81bd 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -6191,11 +6191,12 @@ vectorizable_shift (vec_info *vinfo,
>         stmt_vec_info slpstmt_info;
>  
>         FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
> -         {
> -           gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
> -           if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
> -             scalar_shift_arg = false;
> -         }
> +         if (slpstmt_info)
> +           {
> +             gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
> +             if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
> +               scalar_shift_arg = false;
> +           }
>  
>         /* For internal SLP defs we have to make sure we see scalar stmts
>            for all vector elements.
> @@ -13095,11 +13096,12 @@ can_vectorize_live_stmts (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>        unsigned int i;
>        FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
>       {
> -       if ((STMT_VINFO_LIVE_P (slp_stmt_info)
> -            || (loop_vinfo
> -                && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
> -                && STMT_VINFO_DEF_TYPE (slp_stmt_info)
> -                     == vect_induction_def))
> +       if (slp_stmt_info
> +           && (STMT_VINFO_LIVE_P (slp_stmt_info)
> +               || (loop_vinfo
> +                   && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
> +                   && STMT_VINFO_DEF_TYPE (slp_stmt_info)
> +                   == vect_induction_def))
>             && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
>                                              slp_node_instance, i,
>                                              vec_stmt_p, cost_vec))

Reply via email to