Robin Dapp <rd...@linux.ibm.com> writes:
> Hi,
>
> as discussed in
> https://gcc.gnu.org/pipermail/gcc-patches/2021-October/582627.html this
> introduces a bias parameter for the len_load/len_store ifns as well as
> optabs that is meant to distinguish between Power and s390 variants.
> The default is a bias of 0, while in s390's case vll/vstl do not support
> lengths of zero bytes and a bias of -1 should be used.
>
> Bootstrapped and regtested on Power9 (--with-cpu=power9) and s390
> (--with-arch=z15).
>
> The tiny changes in the Power backend I will post separately.

Some comments in addition to what Kewen said:

> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 8312d08aab2..993e32c1854 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -2696,9 +2696,9 @@ expand_call_mem_ref (tree type, gcall *stmt, int index)
>  static void
>  expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
>  {
> -  class expand_operand ops[3];
> -  tree type, lhs, rhs, maskt;
> -  rtx mem, target, mask;
> +  class expand_operand ops[4];
> +  tree type, lhs, rhs, maskt, biast;
> +  rtx mem, target, mask, bias;
>    insn_code icode;
>  
>    maskt = gimple_call_arg (stmt, 2);
> @@ -2727,7 +2727,16 @@ expand_partial_load_optab_fn (internal_fn, gcall 
> *stmt, convert_optab optab)
>                                TYPE_UNSIGNED (TREE_TYPE (maskt)));
>    else
>      create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
> -  expand_insn (icode, 3, ops);
> +  if (optab == len_load_optab)
> +    {
> +      biast = gimple_call_arg (stmt, 3);
> +      bias = expand_normal (biast);
> +      create_input_operand (&ops[3], bias, QImode);
> +      expand_insn (icode, 4, ops);
> +    }
> +  else
> +    expand_insn (icode, 3, ops);
> +

The previous “if“ is also for len_load_optab, so it seems better to
combine the two.

>    if (!rtx_equal_p (target, ops[0].value))
>      emit_move_insn (target, ops[0].value);
>  }
> @@ -2741,9 +2750,9 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, 
> convert_optab optab)
>  static void
>  expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
>  {
> -  class expand_operand ops[3];
> -  tree type, lhs, rhs, maskt;
> -  rtx mem, reg, mask;
> +  class expand_operand ops[4];
> +  tree type, lhs, rhs, maskt, biast;
> +  rtx mem, reg, mask, bias;
>    insn_code icode;
>  
>    maskt = gimple_call_arg (stmt, 2);
> @@ -2770,7 +2779,16 @@ expand_partial_store_optab_fn (internal_fn, gcall 
> *stmt, convert_optab optab)
>                                TYPE_UNSIGNED (TREE_TYPE (maskt)));
>    else
>      create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
> -  expand_insn (icode, 3, ops);
> +
> +  if (optab == len_store_optab)
> +    {
> +      biast = gimple_call_arg (stmt, 4);
> +      bias = expand_normal (biast);
> +      create_input_operand (&ops[3], bias, QImode);
> +      expand_insn (icode, 4, ops);
> +    }
> +  else
> +    expand_insn (icode, 3, ops);

Same idea here.

>  }
>  
>  #define expand_mask_store_optab_fn expand_partial_store_optab_fn
> @@ -4172,6 +4190,30 @@ internal_check_ptrs_fn_supported_p (internal_fn ifn, 
> tree type,
>         && insn_operand_matches (icode, 4, GEN_INT (align)));
>  }
>  
> +/* Return the supported bias for the len_load IFN.  For now we support a
> +   default bias of 0 and -1 in case 0 is not an allowable length for 
> len_load.

Neither bias is really the default.  How about just “For now we only
support the biases 0 and -1.”?

> +   If none of these biases match what the backend provides, return
> +   VECT_PARTIAL_BIAS_UNSUPPORTED.  */
> +
> +signed char
> +internal_len_load_bias_supported (internal_fn ifn, machine_mode mode)

Since this is now providing the bias rather than asking about a
given bias (thanks), I think we should drop “_supported”.

> +{
> +  optab optab = direct_internal_fn_optab (ifn);
> +  insn_code icode = direct_optab_handler (optab, mode);
> +
> +  if (icode != CODE_FOR_nothing)
> +    {
> +      /* We only support a bias of 0 (default) or -1.  Try both
> +      of them.  */
> +      if (insn_operand_matches (icode, 3, GEN_INT (0)))
> +     return 0;
> +      else if (insn_operand_matches (icode, 3, GEN_INT (-1)))

Minor nit, but: redundant else.

> +     return -1;
> +    }
> +
> +  return VECT_PARTIAL_BIAS_UNSUPPORTED;
> +}
> +
>  /* Expand STMT as though it were a call to internal function FN.  */
>  
>  void
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index 19d0f849a5a..af28cf0d566 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -227,6 +227,10 @@ extern bool internal_gather_scatter_fn_supported_p 
> (internal_fn, tree,
>                                                   tree, tree, int);
>  extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
>                                               poly_uint64, unsigned int);
> +#define VECT_PARTIAL_BIAS_UNSUPPORTED 127
> +
> +extern signed char internal_len_load_bias_supported (internal_fn ifn,
> +                                                  machine_mode);
>  
>  extern void expand_addsub_overflow (location_t, tree_code, tree, tree, tree,
>                                   bool, bool, bool, bool, tree *);
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index e94356d76e9..cd2c33fc4a7 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -1163,6 +1163,15 @@ vect_verify_loop_lens (loop_vec_info loop_vinfo)
>    if (LOOP_VINFO_LENS (loop_vinfo).is_empty ())
>      return false;
>  
> +  opt_machine_mode len_load_mode = get_len_load_store_mode
> +    (loop_vinfo->vector_mode, false);
> +  /* If the backend requires a bias of -1 for LEN_LOAD, we must not emit
> +     len_loads with a length of zero.  In order to avoid that we prohibit
> +     more than one loop length here.  */
> +  if (internal_len_load_bias_supported (IFN_LEN_LOAD, len_load_mode.require 
> ())
> +      == -1 && LOOP_VINFO_LENS (loop_vinfo).length () > 1)

Might as well add the require () to the get_len_load_store_mode call.

As Kewen says, it would be good to store the bias computed here in the
loop_vec_info and assert that all users agree.

If we do that, then there's also the option of adding the bias
to the lengths in vect_set_loop_controls_directly.

> +      return false;
> +
>    unsigned int max_nitems_per_iter = 1;
>    unsigned int i;
>    rgroup_controls *rgl;
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 17849b575b7..c3df26c8009 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -8289,9 +8289,30 @@ vectorizable_store (vec_info *vinfo,
>                                                  gsi);
>                     vec_oprnd = var;
>                   }
> +
> +               /* Check which bias value to use.  Default is 0.
> +                  A bias of -1 means that we cannot emit a LEN_LOAD with
> +                  a length of 0 and need to subtract 1 from the length.  */
> +               char biasval = internal_len_load_bias_supported
> +                 (IFN_LEN_STORE, new_vmode);
> +               tree bias = build_int_cst (intQI_type_node, biasval);
> +               tree new_len = final_len;
> +               if (biasval != 0
> +                   && biasval != VECT_PARTIAL_BIAS_UNSUPPORTED)
> +                 {
> +                   new_len = make_ssa_name (TREE_TYPE (final_len));
> +                   gassign *m1
> +                     = gimple_build_assign (new_len, MINUS_EXPR,
> +                                            final_len,
> +                                            build_one_cst (TREE_TYPE
> +                                                       (final_len)));
> +                   vect_finish_stmt_generation (vinfo, stmt_info, m1,
> +                                                gsi);
> +                 }

If we don't do that and add the bias at the use site instead,
then it'd be good to split this out into a subroutine so that
the load and store code can share it.

Thanks,
Richard

>                 gcall *call
> -                 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
> -                                               ptr, final_len, vec_oprnd);
> +                 = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
> +                                               ptr, new_len, vec_oprnd,
> +                                               bias);
>                 gimple_call_set_nothrow (call, true);
>                 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
>                 new_stmt = call;
> @@ -9588,24 +9609,46 @@ vectorizable_load (vec_info *vinfo,
>                                              vec_num * j + i);
>                       tree ptr = build_int_cst (ref_type,
>                                                 align * BITS_PER_UNIT);
> +
> +                     machine_mode vmode = TYPE_MODE (vectype);
> +                     opt_machine_mode new_ovmode
> +                       = get_len_load_store_mode (vmode, true);
> +                     machine_mode new_vmode = new_ovmode.require ();
> +                     tree qi_type = unsigned_intQI_type_node;
> +                     tree new_vtype
> +                       = build_vector_type_for_mode (qi_type, new_vmode);
> +
> +                     /* Check which bias value to use.  Default is 0. */
> +                     char biasval = internal_len_load_bias_supported
> +                       (IFN_LEN_LOAD, new_vmode);
> +
> +                     tree bias = build_int_cst (intQI_type_node, biasval);
> +                     tree new_len = final_len;
> +                     if (biasval != 0
> +                         && biasval != VECT_PARTIAL_BIAS_UNSUPPORTED)
> +                       {
> +                         new_len = make_ssa_name (TREE_TYPE (final_len));
> +                         gassign *m1 = gimple_build_assign (new_len,
> +                                                            MINUS_EXPR,
> +                                                            final_len,
> +                                                            build_one_cst
> +                                                            (TREE_TYPE
> +                                                             (final_len)));
> +                         vect_finish_stmt_generation (vinfo, stmt_info, m1,
> +                                                      gsi);
> +                       }
> +
>                       gcall *call
> -                       = gimple_build_call_internal (IFN_LEN_LOAD, 3,
> +                       = gimple_build_call_internal (IFN_LEN_LOAD, 4,
>                                                       dataref_ptr, ptr,
> -                                                     final_len);
> +                                                     new_len, bias);
>                       gimple_call_set_nothrow (call, true);
>                       new_stmt = call;
>                       data_ref = NULL_TREE;
>  
>                       /* Need conversion if it's wrapped with VnQI.  */
> -                     machine_mode vmode = TYPE_MODE (vectype);
> -                     opt_machine_mode new_ovmode
> -                       = get_len_load_store_mode (vmode, true);
> -                     machine_mode new_vmode = new_ovmode.require ();
>                       if (vmode != new_vmode)
>                         {
> -                         tree qi_type = unsigned_intQI_type_node;
> -                         tree new_vtype
> -                           = build_vector_type_for_mode (qi_type, new_vmode);
>                           tree var = vect_get_new_ssa_name (new_vtype,
>                                                             vect_simple_var);
>                           gimple_set_lhs (call, var);

Reply via email to