On Thu, Oct 30, 2025 at 2:20 PM Robin Dapp <[email protected]> wrote:
>
> Hi,
>
> This patch adjusts vect_gather_scatter_fn_p to always check an offset
> type with swapped signedness (vs. the original offset argument).
> If the target supports the gather/scatter with the new offset type as
> well as the conversion of the offset we now emit an explicit offset
> conversion before the actual gather/scatter.
>
> The relaxation is only done for the IFN path of gather/scatter and the
> general idea roughly looks like:
>
>   - vect_gather_scatter_fn_p builds a list of all offset vector types
>   that the target supports for the current vectype.  Then it goes
>   through that list, trying direct support first and sign-swapped
>   offset types next, taking precision requirements into account.
>   If successful it sets supported_offset_vectype to the type that actually
>   worked while offset_vectype_out is the type that was requested.
>   - vect_check_gather_scatter works as before but uses the relaxed
>   vect_gather_scatter_fn_p.
>   - get_load_store_type sets ls_data->supported_offset_vectype if the
>   requested type wasn't supported but another one was.
>   - check_load_store_for_partial_vectors uses the
>   supported_offset_vectype in order to validate what get_load_store_type
>   determined.
>   - vectorizable_load/store emit a conversion if
>   ls_data->supported_offset_vectype is nonzero and cost it.
>
> The offset type is either of pointer size (if we started with a signed
> offset) or twice the size of the original offset (when that one was
> unsigned).
>
> Main change from v2 is Richi's idea of a rework of vect_gather_scatter_fn_p.
> It now first collects target supported offset vectypes.  Afterwards these
> vectypes are filtered by direct and indirect (sign-swap) support.  I think
> the logic is clearer and more straightforward now.
>
> I guess it's debatable whether to include IFN and else value in the
> configuration or not as they don't usually change per offset...

I think we'll find out and can easily adjust.

> Bootstrapped on x86 and power10.  Regtested on rv64gcv_zvl512b
> (with and without implicit target support for zero-extension) and aarch64.

I like it!

OK.

Thanks (and sorry for the delay),
Richard.

> Regards
>  Robin
>
> gcc/ChangeLog:
>
>         * tree-vect-data-refs.cc (struct gather_scatter_config): New
>         struct to hold gather/scatter configurations.
>         (vect_gather_scatter_which_ifn): New function to determine which
>         IFN to use.
>         (vect_gather_scatter_get_configs): New function to enumerate all
>         target-supported configs.
>         (vect_gather_scatter_fn_p): Rework to use
>         vect_gather_scatter_get_configs and try sign-swapped offset.
>         (vect_check_gather_scatter): Use new supported offset vectype
>         argument.
>         * tree-vect-stmts.cc (check_load_store_for_partial_vectors):
>         Ditto.
>         (vect_truncate_gather_scatter_offset): Ditto.
>         (vect_use_grouped_gather): Ditto.
>         (get_load_store_type): Ditto.
>         (vectorizable_store): Convert to sign-swapped offset type if
>         needed.
>         (vectorizable_load): Ditto.
>         * tree-vectorizer.h (struct vect_load_store_data): Add
>         supported_offset_vectype.
>         (vect_gather_scatter_fn_p): Add argument.
> ---
>  gcc/tree-vect-data-refs.cc | 270 +++++++++++++++++++++++++++----------
>  gcc/tree-vect-stmts.cc     |  79 ++++++++++-
>  gcc/tree-vectorizer.h      |   6 +-
>  3 files changed, 280 insertions(+), 75 deletions(-)
>
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index c7941108887..fb2450a30c4 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -4425,6 +4425,143 @@ vect_prune_runtime_alias_test_list (loop_vec_info 
> loop_vinfo)
>    return opt_result::success ();
>  }
>
> +/* Structure to hold information about a supported gather/scatter
> +   configuration.  */
> +struct gather_scatter_config
> +{
> +  internal_fn ifn;
> +  tree offset_vectype;
> +  vec<int> elsvals;
> +};
> +
> +/* Determine which gather/scatter IFN is supported for the given parameters.
> +   IFN_MASK_GATHER_LOAD, IFN_GATHER_LOAD, and IFN_MASK_LEN_GATHER_LOAD
> +   are mutually exclusive, so we only need to find one.  Return the
> +   supported IFN or IFN_LAST if none are supported.  */
> +
> +static internal_fn
> +vect_gather_scatter_which_ifn (bool read_p, bool masked_p,
> +                              tree vectype, tree memory_type,
> +                              tree offset_vectype, int scale,
> +                              vec<int> *elsvals)
> +{
> +  /* Work out which functions to try.  */
> +  internal_fn ifn, alt_ifn, alt_ifn2;
> +  if (read_p)
> +    {
> +      ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
> +      alt_ifn = IFN_MASK_GATHER_LOAD;
> +      alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
> +    }
> +  else
> +    {
> +      ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
> +      alt_ifn = IFN_MASK_SCATTER_STORE;
> +      alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
> +    }
> +
> +  if (!offset_vectype)
> +    return IFN_LAST;
> +
> +  if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
> +                                             offset_vectype, scale, elsvals))
> +    return ifn;
> +  if (internal_gather_scatter_fn_supported_p (alt_ifn, vectype, memory_type,
> +                                             offset_vectype, scale, elsvals))
> +    return alt_ifn;
> +  if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype, memory_type,
> +                                             offset_vectype, scale, elsvals))
> +    return alt_ifn2;
> +
> +  return IFN_LAST;
> +}
> +
> +/* Collect all supported offset vector types for a gather load or scatter
> +   store.  READ_P is true for loads and false for stores.  MASKED_P is true
> +   if the load or store is conditional.  VECTYPE is the data vector type.
> +   MEMORY_TYPE is the type of the memory elements being loaded or stored,
> +   and OFFSET_TYPE is the type of the offset.
> +   SCALE is the amount by which the offset should be multiplied.
> +
> +   Return a vector of all configurations the target supports (which can
> +   be none).  */
> +
> +static auto_vec<gather_scatter_config>
> +vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p,
> +                                tree vectype, tree memory_type,
> +                                tree offset_type, int scale)
> +{
> +  auto_vec<gather_scatter_config> configs;
> +
> +  auto_vec<tree, 8> offset_types_to_try;
> +
> +  /* Try all sizes from the offset type's precision up to POINTER_SIZE.  */
> +  for (unsigned int bits = TYPE_PRECISION (offset_type);
> +       bits <= POINTER_SIZE;
> +       bits *= 2)
> +    {
> +      /* Signed variant.  */
> +      offset_types_to_try.safe_push
> +       (build_nonstandard_integer_type (bits, 0));
> +      /* Unsigned variant.  */
> +      offset_types_to_try.safe_push
> +       (build_nonstandard_integer_type (bits, 1));
> +    }
> +
> +  /* Once we find which IFN works for one offset type, we know that it
> +     will work for other offset types as well.  Then we can perform
> +     the checks for the remaining offset types with only that IFN.
> +     However, we might need to try different offset types to find which
> +     IFN is supported, since the check is offset-type-specific.  */
> +  internal_fn ifn = IFN_LAST;
> +
> +  /* Try each offset type.  */
> +  for (unsigned int i = 0; i < offset_types_to_try.length (); i++)
> +    {
> +      tree offset_type = offset_types_to_try[i];
> +      tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
> +      if (!offset_vectype)
> +       continue;
> +
> +      vec<int> elsvals = vNULL;
> +
> +      /* If we haven't determined which IFN is supported yet, try all three
> +        to find which one the target supports.  */
> +      if (ifn == IFN_LAST)
> +       {
> +         ifn = vect_gather_scatter_which_ifn (read_p, masked_p,
> +                                              vectype, memory_type,
> +                                              offset_vectype, scale, 
> &elsvals);
> +         if (ifn != IFN_LAST)
> +           {
> +             /* Found which IFN is supported.  Save this configuration.  */
> +             gather_scatter_config config;
> +             config.ifn = ifn;
> +             config.offset_vectype = offset_vectype;
> +             config.elsvals = elsvals;
> +             configs.safe_push (config);
> +           }
> +       }
> +      else
> +       {
> +         /* We already know which IFN is supported, just check if this
> +            offset type works with it.  */
> +         if (internal_gather_scatter_fn_supported_p (ifn, vectype, 
> memory_type,
> +                                                     offset_vectype, scale,
> +                                                     &elsvals))
> +           {
> +             gather_scatter_config config;
> +             config.ifn = ifn;
> +             config.offset_vectype = offset_vectype;
> +             config.elsvals = elsvals;
> +             configs.safe_push (config);
> +           }
> +       }
> +    }
> +
> +  return configs;
> +}
> +
>  /* Check whether we can use an internal function for a gather load
>     or scatter store.  READ_P is true for loads and false for stores.
>     MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
> @@ -4436,15 +4573,21 @@ vect_prune_runtime_alias_test_list (loop_vec_info 
> loop_vinfo)
>
>     Return true if the function is supported, storing the function id in
>     *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
> +   If we support an offset vector type with different signedness than
> +   OFFSET_TYPE store it in SUPPORTED_OFFSET_VECTYPE.
>
> -   If we can use gather and store the possible else values in ELSVALS.  */
> +   If we can use gather/scatter and ELSVALS is nonzero, store the possible
> +   else values in ELSVALS.  */
>
>  bool
>  vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
>                           tree vectype, tree memory_type, tree offset_type,
>                           int scale, internal_fn *ifn_out,
> -                         tree *offset_vectype_out, vec<int> *elsvals)
> +                         tree *offset_vectype_out,
> +                         tree *supported_offset_vectype,
> +                         vec<int> *elsvals)
>  {
> +  *supported_offset_vectype = NULL_TREE;
>    unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
>    unsigned int element_bits = vector_element_bits (vectype);
>    if (element_bits != memory_bits)
> @@ -4452,80 +4595,64 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool 
> read_p, bool masked_p,
>         memory elements.  */
>      return false;
>
> -  /* Work out which function we need.  */
> -  internal_fn ifn, alt_ifn, alt_ifn2;
> -  if (read_p)
> -    {
> -      ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
> -      alt_ifn = IFN_MASK_GATHER_LOAD;
> -      /* When target supports MASK_LEN_GATHER_LOAD, we always
> -        use MASK_LEN_GATHER_LOAD regardless whether len and
> -        mask are valid or not.  */
> -      alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
> -    }
> -  else
> -    {
> -      ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
> -      alt_ifn = IFN_MASK_SCATTER_STORE;
> -      /* When target supports MASK_LEN_SCATTER_STORE, we always
> -        use MASK_LEN_SCATTER_STORE regardless whether len and
> -        mask are valid or not.  */
> -      alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
> -    }
> +  /* Get the original offset vector type for comparison.  */
> +  tree offset_vectype = VECTOR_TYPE_P (offset_type)
> +    ? offset_type : get_vectype_for_scalar_type (vinfo, offset_type);
>
> -  for (;;)
> -    {
> -      tree offset_vectype;
> -      if (VECTOR_TYPE_P (offset_type))
> -       offset_vectype = offset_type;
> -      else
> -       {
> -         offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
> -         if (!offset_vectype)
> -           return false;
> -       }
> +  offset_type = TREE_TYPE (offset_vectype);
>
> -      /* Test whether the target supports this combination.  */
> -      if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
> -                                                 offset_vectype, scale,
> -                                                 elsvals))
> -       {
> -         *ifn_out = ifn;
> -         *offset_vectype_out = offset_vectype;
> -         return true;
> -       }
> -      else if (!masked_p
> -              && internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
> -                                                         memory_type,
> -                                                         offset_vectype,
> -                                                         scale, elsvals))
> +  /* Get all supported configurations for this data vector type.  */
> +  auto_vec<gather_scatter_config> configs
> +    = vect_gather_scatter_get_configs (vinfo, read_p, masked_p, vectype,
> +                                      memory_type, offset_type, scale);
> +
> +  if (configs.is_empty ())
> +    return false;
> +
> +  /* First, try to find a configuration that matches our offset type
> +     (no conversion needed).  */
> +  for (unsigned int i = 0; i < configs.length (); i++)
> +    {
> +      if (TYPE_SIGN (configs[i].offset_vectype) == TYPE_SIGN 
> (offset_vectype))
>         {
> -         *ifn_out = alt_ifn;
> -         *offset_vectype_out = offset_vectype;
> +         *ifn_out = configs[i].ifn;
> +         *offset_vectype_out = configs[i].offset_vectype;
> +         if (elsvals)
> +           *elsvals = configs[i].elsvals;
>           return true;
>         }
> -      else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype,
> -                                                      memory_type,
> -                                                      offset_vectype, scale,
> -                                                      elsvals))
> +    }
> +
> +  /* No direct match.  This means we try to find a sign-swapped offset
> +     vectype.  */
> +  unsigned int offset_precision = TYPE_PRECISION (TREE_TYPE 
> (offset_vectype));
> +  unsigned int needed_precision
> +    = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
> +  needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
> +
> +  enum tree_code tmp;
> +  for (unsigned int i = 0; i < configs.length (); i++)
> +    {
> +      unsigned int precision
> +       = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
> +      if (precision >= needed_precision
> +         && (supportable_convert_operation (CONVERT_EXPR,
> +                                            configs[i].offset_vectype,
> +                                            offset_vectype, &tmp)
> +             || (needed_precision == offset_precision
> +                 && tree_nop_conversion_p (configs[i].offset_vectype,
> +                                           offset_vectype))))
>         {
> -         *ifn_out = alt_ifn2;
> +         *ifn_out = configs[i].ifn;
>           *offset_vectype_out = offset_vectype;
> +         *supported_offset_vectype = configs[i].offset_vectype;
> +         if (elsvals)
> +           *elsvals = configs[i].elsvals;
>           return true;
>         }
> -
> -      /* For fixed offset vector type we're done.  */
> -      if (VECTOR_TYPE_P (offset_type))
> -       return false;
> -
> -      if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
> -         && TYPE_PRECISION (offset_type) >= element_bits)
> -       return false;
> -
> -      /* Try a larger offset vector type.  */
> -      offset_type = build_nonstandard_integer_type
> -       (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type));
>      }
> +
> +  return false;
>  }
>
>  /* STMT_INFO is a call to an internal gather load or scatter store function.
> @@ -4678,6 +4805,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> tree vectype,
>
>    base = fold_convert (sizetype, base);
>    base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
> +  tree tmp_offset_vectype;
>
>    /* OFF at this point may be either a SSA_NAME or some tree expression
>       from get_inner_reference.  Try to peel off loop invariants from it
> @@ -4752,12 +4880,14 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> tree vectype,
>                                                 signed_char_type_node,
>                                                 new_scale, &ifn,
>                                                 &offset_vectype,
> +                                               &tmp_offset_vectype,
>                                                 elsvals)
>                   && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
>                                                 masked_p, vectype, 
> memory_type,
>                                                 unsigned_char_type_node,
>                                                 new_scale, &ifn,
>                                                 &offset_vectype,
> +                                               &tmp_offset_vectype,
>                                                 elsvals))
>                 break;
>               scale = new_scale;
> @@ -4781,7 +4911,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> tree vectype,
>               && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
>                                            masked_p, vectype, memory_type,
>                                            TREE_TYPE (off), scale, &ifn,
> -                                          &offset_vectype, elsvals))
> +                                          &offset_vectype,
> +                                          &tmp_offset_vectype,
> +                                          elsvals))
>             break;
>
>           if (TYPE_PRECISION (TREE_TYPE (op0))
> @@ -4835,7 +4967,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> tree vectype,
>      {
>        if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
>                                      vectype, memory_type, offtype, scale,
> -                                    &ifn, &offset_vectype, elsvals))
> +                                    &ifn, &offset_vectype,
> +                                    &tmp_offset_vectype,
> +                                    elsvals))
>         ifn = IFN_LAST;
>        decl = NULL_TREE;
>      }
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 83acbb3ff67..84ba756a042 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -1505,6 +1505,14 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>                           : ls->strided_offset_vectype);
>        tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr));
>        int scale = SLP_TREE_GS_SCALE (slp_node);
> +
> +      /* The following "supported" checks just verify what we established in
> +        get_load_store_type and don't try different offset types.
> +        Therefore, off_vectype must be a supported offset type.  In case
> +        we chose a different one use this instead.  */
> +      if (ls->supported_offset_vectype)
> +       off_vectype = ls->supported_offset_vectype;
> +
>        if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
>                                                   memory_type,
>                                                   off_vectype, scale,
> @@ -1697,10 +1705,11 @@ vect_truncate_gather_scatter_offset (stmt_vec_info 
> stmt_info, tree vectype,
>        /* See whether the target supports the operation with an offset
>          no narrower than OFFSET_TYPE.  */
>        tree memory_type = TREE_TYPE (DR_REF (dr));
> +      tree tmp_offset_vectype;
>        if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
>                                      vectype, memory_type, offset_type, scale,
>                                      &gs_info->ifn, &gs_info->offset_vectype,
> -                                    elsvals)
> +                                    &tmp_offset_vectype, elsvals)
>           || gs_info->ifn == IFN_LAST)
>         continue;
>
> @@ -1779,10 +1788,11 @@ vect_use_grouped_gather (dr_vec_info *dr_info, tree 
> vectype,
>       type must exist) so it is possible that even though a gather/scatter is
>       not available we still have a strided load/store.  */
>    bool ok = false;
> +  tree tmp_vectype;
>    if (vect_gather_scatter_fn_p
>        (loop_vinfo, DR_IS_READ (dr), masked_p, *pun_vectype,
>         TREE_TYPE (*pun_vectype), *pun_vectype, 1, &ifn,
> -       &offset_vectype, elsvals))
> +       &offset_vectype, &tmp_vectype, elsvals))
>      ok = true;
>    else if (internal_strided_fn_supported_p (strided_ifn, *pun_vectype,
>                                             elsvals))
> @@ -2080,6 +2090,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>    tree *ls_type = &ls->ls_type;
>    bool *slp_perm = &ls->slp_perm;
>    unsigned *n_perms = &ls->n_perms;
> +  tree *supported_offset_vectype = &ls->supported_offset_vectype;
>    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
>    poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
>    class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
> @@ -2152,12 +2163,25 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>        tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr));
>        tree tem;
>        if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
> -                                   masked_p, vectype,
> -                                   memory_type,
> +                                   masked_p, vectype, memory_type,
>                                     offset_vectype, scale,
>                                     &ls->gs.ifn, &tem,
> -                                   elsvals))
> -       *memory_access_type = VMAT_GATHER_SCATTER_IFN;
> +                                   supported_offset_vectype, elsvals))
> +       {
> +         if (dump_enabled_p ())
> +           {
> +             dump_printf_loc (MSG_NOTE, vect_location,
> +                              "gather/scatter with required "
> +                              "offset type "
> +                              "%T and offset scale %d.\n",
> +                              offset_vectype, scale);
> +             if (*supported_offset_vectype)
> +               dump_printf_loc (MSG_NOTE, vect_location,
> +                                " target supports offset type %T.\n",
> +                                *supported_offset_vectype);
> +           }
> +         *memory_access_type = VMAT_GATHER_SCATTER_IFN;
> +       }
>        else if (vls_type == VLS_LOAD
>                ? (targetm.vectorize.builtin_gather
>                   && (ls->gs.decl
> @@ -2421,6 +2445,19 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>                                                  masked_p, &gs_info, elsvals,
>                                                  group_size, 
> single_element_p))
>         {
> +         /* vect_use_strided_gather_scatters_p does not save the actually
> +            supported scale and offset type so do that here.
> +            We need it later in check_load_store_for_partial_vectors
> +            where we only check if the given internal function is supported
> +            (to choose whether to use the IFN, LEGACY, or EMULATED flavor
> +            of gather/scatter) and don't re-do the full analysis.  */
> +         tree tmp;
> +         gcc_assert (vect_gather_scatter_fn_p
> +                     (loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype,
> +                      gs_info.memory_type, TREE_TYPE (gs_info.offset),
> +                      gs_info.scale, &gs_info.ifn,
> +                      &tmp, supported_offset_vectype, elsvals));
> +
>           SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
>           SLP_TREE_GS_BASE (slp_node) = error_mark_node;
>           ls->gs.ifn = gs_info.ifn;
> @@ -8812,6 +8849,11 @@ vectorizable_store (vec_info *vinfo,
>             {
>               if (costing_p)
>                 {
> +                 if (ls.supported_offset_vectype)
> +                   inside_cost
> +                     += record_stmt_cost (cost_vec, 1, vector_stmt,
> +                                          slp_node, 0, vect_body);
> +
>                   unsigned int cnunits = vect_nunits_for_cost (vectype);
>                   inside_cost
>                     += record_stmt_cost (cost_vec, cnunits, scalar_store,
> @@ -8823,6 +8865,16 @@ vectorizable_store (vec_info *vinfo,
>                 vec_offset = vec_offsets[j];
>
>               tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
> +             bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
> +
> +             /* Perform the offset conversion if necessary.  */
> +             if (!strided && ls.supported_offset_vectype)
> +               {
> +                 gimple_seq stmts = NULL;
> +                 vec_offset = gimple_convert
> +                   (&stmts, ls.supported_offset_vectype, vec_offset);
> +                 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> +               }
>
>               if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE)
>                 {
> @@ -10638,6 +10690,11 @@ vectorizable_load (vec_info *vinfo,
>             {
>               if (costing_p)
>                 {
> +                 if (ls.supported_offset_vectype)
> +                   inside_cost
> +                     += record_stmt_cost (cost_vec, 1, vector_stmt,
> +                                          slp_node, 0, vect_body);
> +
>                   unsigned int cnunits = vect_nunits_for_cost (vectype);
>                   inside_cost
>                     = record_stmt_cost (cost_vec, cnunits, scalar_load,
> @@ -10648,6 +10705,16 @@ vectorizable_load (vec_info *vinfo,
>                 vec_offset = vec_offsets[i];
>               tree zero = build_zero_cst (vectype);
>               tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
> +             bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
> +
> +             /* Perform the offset conversion if necessary.  */
> +             if (!strided && ls.supported_offset_vectype)
> +               {
> +                 gimple_seq stmts = NULL;
> +                 vec_offset = gimple_convert
> +                   (&stmts, ls.supported_offset_vectype, vec_offset);
> +                 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> +               }
>
>               if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD)
>                 {
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 905a29142d3..a49fb9cb1ad 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -289,6 +289,10 @@ struct vect_load_store_data : vect_data {
>    } gs;
>    tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided
>    tree ls_type; // VMAT_GATHER_SCATTER_IFN
> +  /* This is set to a supported offset vector type if we don't support the
> +     originally requested offset type.  In that case there will be an
> +     additional offset conversion before the gather/scatter.  */
> +  tree supported_offset_vectype; // VMAT_GATHER_SCATTER_IFN
>    auto_vec<int> elsvals;
>    /* True if the load requires a load permutation.  */
>    bool slp_perm;    // SLP_TREE_LOAD_PERMUTATION
> @@ -2593,7 +2597,7 @@ extern opt_result vect_analyze_data_ref_accesses 
> (vec_info *, vec<int> *);
>  extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
>  extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
>                                       tree, int, internal_fn *, tree *,
> -                                     vec<int> * = nullptr);
> +                                     tree *, vec<int> * = nullptr);
>  extern bool vect_check_gather_scatter (stmt_vec_info, tree,
>                                        loop_vec_info, gather_scatter_info *,
>                                        vec<int> * = nullptr);
> --
> 2.51.0
>

Reply via email to