On Tue, 19 Aug 2025, Richard Biener wrote:

> The following is a patch to make us record the get_load_store_info
> results from load/store analysis and re-use them during transform.
> In particular this moves where SLP_TREE_MEMORY_ACCESS_TYPE is stored.
> 
> A major hassle was (and still is, to some extent), gather/scatter
> handling with it's accompaning gather_scatter_info.  As
> get_load_store_info no longer fully re-analyzes them but parts of
> the information is recorded in the SLP tree during SLP build the
> following goes and eliminates the use of this data in
> vectorizable_load/store, instead recording the other relevant
> part in the load-store info (namely the IFN or decl chosen).
> Strided load handling keeps the re-analysis but populates the
> data back to the SLP tree and the load-store info.  That's something
> for further improvement.  This also shows that early classifying
> a SLP tree as load/store and allocating the load-store data might
> be a way to move back all of the gather/scatter auxiliary data
> into one place.
> 
> Rather than mass-replacing references to variables I've kept the
> locals but made them read-only, only adjusting a few elsval setters
> and adding a FIXME to strided SLP handling of alignment (allowing
> local override there).
> 
> The FIXME shows that while a lot of analysis is done in
> get_load_store_type that's far from all of it.  There's also
> a possibility that splitting up the transform phase into
> separate load/store def types, based on VMAT choosen, will make
> the code more maintainable.

Bootstrapped and tested on x86_64-unknown-linux-gnu, the riscv CI
picked it up without issues and Tamar tested on aarch64 (I also
tested cross for that).  I have pushed this series now.

Richard.

>       * tree-vectorizer.h (vect_load_store_data): New.
>       (_slp_tree::memory_access_type): Remove.
>       (SLP_TREE_MEMORY_ACCESS_TYPE): Turn into inline function.
>       * tree-vect-slp.cc (_slp_tree::_slp_tree): Do not
>       initialize SLP_TREE_MEMORY_ACCESS_TYPE.
>       * tree-vect-stmts.cc (check_load_store_for_partial_vectors):
>       Remove gather_scatter_info pointer argument, instead get
>       info from the SLP node.
>       (vect_build_one_gather_load_call): Get SLP node and builtin
>       decl as argument and remove uses of gather_scatter_info.
>       (vect_build_one_scatter_store_call): Likewise.
>       (vect_get_gather_scatter_ops): Remove uses of gather_scatter_info.
>       (vect_get_strided_load_store_ops): Get SLP node and remove
>       uses of gather_scatter_info.
>       (get_load_store_type): Take pointer to vect_load_store_data
>       instead of individual pointers.
>       (vectorizable_store): Adjust.  Re-use get_load_store_type
>       result from analysis time.
>       (vectorizable_load): Likewise.
> ---
>  gcc/tree-vect-slp.cc   |   1 -
>  gcc/tree-vect-stmts.cc | 285 ++++++++++++++++++++++-------------------
>  gcc/tree-vectorizer.h  |  34 ++++-
>  3 files changed, 182 insertions(+), 138 deletions(-)
> 
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 05363f89cf4..628d8a02ba9 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -126,7 +126,6 @@ _slp_tree::_slp_tree ()
>    this->avoid_stlf_fail = false;
>    SLP_TREE_VECTYPE (this) = NULL_TREE;
>    SLP_TREE_REPRESENTATIVE (this) = NULL;
> -  SLP_TREE_MEMORY_ACCESS_TYPE (this) = VMAT_UNINITIALIZED;
>    SLP_TREE_REF_COUNT (this) = 1;
>    this->failed = NULL;
>    this->max_nunits = 1;
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 675c6e2e683..ed6e5a11122 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -1428,12 +1428,12 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>                                     slp_tree slp_node,
>                                     vec_load_store_type vls_type,
>                                     int group_size,
> -                                   vect_memory_access_type
> -                                   memory_access_type,
> -                                   const gather_scatter_info *gs_info,
> +                                   vect_load_store_data *ls,
>                                     slp_tree mask_node,
>                                     vec<int> *elsvals = nullptr)
>  {
> +  vect_memory_access_type memory_access_type = ls->memory_access_type;
> +
>    /* Invariant loads need no special support.  */
>    if (memory_access_type == VMAT_INVARIANT)
>      return;
> @@ -1492,16 +1492,20 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>        internal_fn len_ifn = (is_load
>                            ? IFN_MASK_LEN_GATHER_LOAD
>                            : IFN_MASK_LEN_SCATTER_STORE);
> +      stmt_vec_info repr = SLP_TREE_REPRESENTATIVE (slp_node);
> +      tree off_vectype = (STMT_VINFO_GATHER_SCATTER_P (repr)
> +                       ? SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0])
> +                       : ls->strided_offset_vectype);
> +      tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr));
> +      int scale = SLP_TREE_GS_SCALE (slp_node);
>        if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
> -                                               gs_info->memory_type,
> -                                               gs_info->offset_vectype,
> -                                               gs_info->scale,
> +                                               memory_type,
> +                                               off_vectype, scale,
>                                                 elsvals))
>       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
>        else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
> -                                                    gs_info->memory_type,
> -                                                    gs_info->offset_vectype,
> -                                                    gs_info->scale,
> +                                                    memory_type,
> +                                                    off_vectype, scale,
>                                                      elsvals)
>              || memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
>       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
> @@ -1960,14 +1964,15 @@ static bool
>  get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
>                    tree vectype, slp_tree slp_node,
>                    bool masked_p, vec_load_store_type vls_type,
> -                  vect_memory_access_type *memory_access_type,
> -                  poly_int64 *poffset,
> -                  dr_alignment_support *alignment_support_scheme,
> -                  int *misalignment,
> -                  gather_scatter_info *gs_info,
> -                  internal_fn *lanes_ifn,
> -                  vec<int> *elsvals = nullptr)
> +                  vect_load_store_data *ls)
>  {
> +  vect_memory_access_type *memory_access_type = &ls->memory_access_type;
> +  poly_int64 *poffset = &ls->poffset;
> +  dr_alignment_support *alignment_support_scheme
> +    = &ls->alignment_support_scheme;
> +  int *misalignment = &ls->misalignment;
> +  internal_fn *lanes_ifn = &ls->lanes_ifn;
> +  vec<int> *elsvals = &ls->elsvals;
>    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
>    poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
>    class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
> @@ -2025,34 +2030,29 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>      {
>        slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
>        tree offset_vectype = SLP_TREE_VECTYPE (offset_node);
> -      memset (gs_info, 0, sizeof (gather_scatter_info));
> -      gs_info->offset_vectype = offset_vectype;
> -      gs_info->scale = SLP_TREE_GS_SCALE (slp_node);
> -      gs_info->base = SLP_TREE_GS_BASE (slp_node);
> -      gs_info->memory_type = TREE_TYPE (DR_REF (first_dr_info->dr));
> -      gs_info->decl = NULL_TREE;
> -      gs_info->ifn = IFN_LAST;
> +      int scale = SLP_TREE_GS_SCALE (slp_node);
> +      tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr));
>        tree tem;
>        if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
>                                   masked_p, vectype,
> -                                 gs_info->memory_type,
> -                                 offset_vectype, gs_info->scale,
> -                                 &gs_info->ifn, &tem,
> +                                 memory_type,
> +                                 offset_vectype, scale,
> +                                 &ls->gs.ifn, &tem,
>                                   elsvals))
>       *memory_access_type = VMAT_GATHER_SCATTER_IFN;
>        else if (vls_type == VLS_LOAD
>              ? (targetm.vectorize.builtin_gather
> -               && (gs_info->decl
> +               && (ls->gs.decl
>                       = targetm.vectorize.builtin_gather (vectype,
>                                                           TREE_TYPE
>                                                             (offset_vectype),
> -                                                         gs_info->scale)))
> +                                                         scale)))
>              : (targetm.vectorize.builtin_scatter
> -               && (gs_info->decl
> +               && (ls->gs.decl
>                       = targetm.vectorize.builtin_scatter (vectype,
>                                                            TREE_TYPE
>                                                              (offset_vectype),
> -                                                          gs_info->scale))))
> +                                                          scale))))
>       *memory_access_type = VMAT_GATHER_SCATTER_LEGACY;
>        else
>       {
> @@ -2317,11 +2317,20 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>         || *memory_access_type == VMAT_STRIDED_SLP)
>        && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
>        && SLP_TREE_LANES (slp_node) == 1
> -      && loop_vinfo
> -      && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
> -                                          masked_p, gs_info, elsvals,
> -                                          group_size, single_element_p))
> -    *memory_access_type = VMAT_GATHER_SCATTER_IFN;
> +      && loop_vinfo)
> +    {
> +      gather_scatter_info gs_info;
> +      if (vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
> +                                           masked_p, &gs_info, elsvals,
> +                                           group_size, single_element_p))
> +     {
> +       SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
> +       SLP_TREE_GS_BASE (slp_node) = error_mark_node;
> +       ls->gs.ifn = gs_info.ifn;
> +       ls->strided_offset_vectype = gs_info.offset_vectype;
> +       *memory_access_type = VMAT_GATHER_SCATTER_IFN;
> +     }
> +    }
>  
>    if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
>        || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
> @@ -2715,13 +2724,12 @@ vect_get_mask_load_else (int elsval, tree type)
>  
>  static gimple *
>  vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
> -                              tree vectype,
> -                              gimple_stmt_iterator *gsi,
> -                              const gather_scatter_info *gs_info,
> +                              slp_tree slp_node, tree vectype,
> +                              gimple_stmt_iterator *gsi, tree decl,
>                                tree ptr, tree offset, tree mask)
>  {
> -  tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
> -  tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
> +  tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl));
> +  tree rettype = TREE_TYPE (TREE_TYPE (decl));
>    tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
>    /* ptrtype */ arglist = TREE_CHAIN (arglist);
>    tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
> @@ -2787,8 +2795,8 @@ vect_build_one_gather_load_call (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>        mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
>      }
>  
> -  tree scale = build_int_cst (scaletype, gs_info->scale);
> -  gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
> +  tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node));
> +  gimple *new_stmt = gimple_build_call (decl, 5, src_op, ptr, op,
>                                       mask_op, scale);
>  
>    if (!useless_type_conversion_p (vectype, rettype))
> @@ -2814,12 +2822,13 @@ vect_build_one_gather_load_call (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>  
>  static gimple *
>  vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
> +                                slp_tree slp_node,
>                                  gimple_stmt_iterator *gsi,
> -                                const gather_scatter_info *gs_info,
> +                                tree decl,
>                                  tree ptr, tree offset, tree oprnd, tree mask)
>  {
> -  tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
> -  tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
> +  tree rettype = TREE_TYPE (TREE_TYPE (decl));
> +  tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl));
>    /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
>    tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
>    tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
> @@ -2883,9 +2892,9 @@ vect_build_one_scatter_store_call (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>        op = var;
>      }
>  
> -  tree scale = build_int_cst (scaletype, gs_info->scale);
> +  tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node));
>    gcall *new_stmt
> -    = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
> +    = gimple_build_call (decl, 5, ptr, mask_arg, op, src, scale);
>    return new_stmt;
>  }
>  
> @@ -2897,11 +2906,11 @@ vect_build_one_scatter_store_call (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>  
>  static void
>  vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node,
> -                          const gather_scatter_info *gs_info,
>                            tree *dataref_ptr, vec<tree> *vec_offset)
>  {
>    gimple_seq stmts = NULL;
> -  *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, 
> NULL_TREE);
> +  *dataref_ptr = force_gimple_operand (SLP_TREE_GS_BASE (slp_node),
> +                                    &stmts, true, NULL_TREE);
>    if (stmts != NULL)
>      {
>        basic_block new_bb;
> @@ -2922,10 +2931,10 @@ vect_get_gather_scatter_ops (class loop *loop, 
> slp_tree slp_node,
>     I * DR_STEP / SCALE.  */
>  
>  static void
> -vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype,
> +vect_get_strided_load_store_ops (stmt_vec_info stmt_info, slp_tree node,
> +                              tree vectype, tree offset_vectype,
>                                loop_vec_info loop_vinfo,
>                                gimple_stmt_iterator *gsi,
> -                              const gather_scatter_info *gs_info,
>                                tree *dataref_bump, tree *vec_offset,
>                                vec_loop_lens *loop_lens)
>  {
> @@ -2966,15 +2975,15 @@ vect_get_strided_load_store_ops (stmt_vec_info 
> stmt_info, tree vectype,
>  
>    /* The offset given in GS_INFO can have pointer type, so use the element
>       type of the vector instead.  */
> -  tree offset_type = TREE_TYPE (gs_info->offset_vectype);
> +  tree offset_type = TREE_TYPE (offset_vectype);
>  
>    /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  
> */
>    tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
> -                       ssize_int (gs_info->scale));
> +                       ssize_int (SLP_TREE_GS_SCALE (node)));
>    step = fold_convert (offset_type, step);
>  
>    /* Create {0, X, X*2, X*3, ...}.  */
> -  tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
> +  tree offset = fold_build2 (VEC_SERIES_EXPR, offset_vectype,
>                            build_zero_cst (offset_type), step);
>    *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
>  }
> @@ -7729,7 +7738,6 @@ vectorizable_store (vec_info *vinfo,
>    unsigned int vec_num;
>    bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
>    tree aggr_type;
> -  gather_scatter_info gs_info;
>    poly_uint64 vf;
>    vec_load_store_type vls_type;
>    tree ref_type;
> @@ -7814,16 +7822,20 @@ vectorizable_store (vec_info *vinfo,
>    if (!STMT_VINFO_DATA_REF (stmt_info))
>      return false;
>  
> -  vect_memory_access_type memory_access_type;
> -  enum dr_alignment_support alignment_support_scheme;
> -  int misalignment;
> -  poly_int64 poffset;
> -  internal_fn lanes_ifn;
> -  if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node,
> -                         vls_type, &memory_access_type, &poffset,
> -                         &alignment_support_scheme, &misalignment, &gs_info,
> -                         &lanes_ifn))
> +  vect_load_store_data _ls_data;
> +  vect_load_store_data &ls = slp_node->get_data (_ls_data);
> +  if (cost_vec
> +      && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, 
> mask_node,
> +                            vls_type, &_ls_data))
>      return false;
> +  /* Temporary aliases to analysis data, should not be modified through
> +     these.  */
> +  const vect_memory_access_type memory_access_type = ls.memory_access_type;
> +  const dr_alignment_support alignment_support_scheme
> +    = ls.alignment_support_scheme;
> +  const int misalignment = ls.misalignment;
> +  const poly_int64 poffset = ls.poffset;
> +  const internal_fn lanes_ifn = ls.lanes_ifn;
>  
>    if (slp_node->ldst_lanes
>        && memory_access_type != VMAT_LOAD_STORE_LANES)
> @@ -7895,13 +7907,10 @@ vectorizable_store (vec_info *vinfo,
>    bool costing_p = cost_vec;
>    if (costing_p) /* transformation not required.  */
>      {
> -      SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
> -
>        if (loop_vinfo
>         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
>       check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
> -                                           vls_type, group_size,
> -                                           memory_access_type, &gs_info,
> +                                           vls_type, group_size, &ls,
>                                             mask_node);
>  
>        if (!vect_maybe_update_slp_op_vectype (op_node, vectype)
> @@ -7924,8 +7933,8 @@ vectorizable_store (vec_info *vinfo,
>                        "Vectorizing an unaligned access.\n");
>  
>        SLP_TREE_TYPE (slp_node) = store_vec_info_type;
> +      slp_node->data = new vect_load_store_data (std::move (ls));
>      }
> -  gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node));
>  
>    /* Transform.  */
>  
> @@ -8020,6 +8029,14 @@ vectorizable_store (vec_info *vinfo,
>            ...
>           */
>  
> +      /* ???  Modify local copies of alignment_support_scheme and
> +      misalignment, but this part of analysis should be done
> +      earlier and remembered, likewise the chosen load mode.  */
> +      const dr_alignment_support tem = alignment_support_scheme;
> +      dr_alignment_support alignment_support_scheme = tem;
> +      const int tem2 = misalignment;
> +      int misalignment = tem2;
> +
>        unsigned nstores = const_nunits;
>        unsigned lnel = 1;
>        tree ltype = elem_type;
> @@ -8289,8 +8306,9 @@ vectorizable_store (vec_info *vinfo,
>      {
>        aggr_type = elem_type;
>        if (!costing_p)
> -     vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo,
> -                                      gsi, &gs_info,
> +     vect_get_strided_load_store_ops (stmt_info, slp_node, vectype,
> +                                      ls.strided_offset_vectype,
> +                                      loop_vinfo, gsi,
>                                        &bump, &vec_offset, loop_lens);
>      }
>    else
> @@ -8497,7 +8515,7 @@ vectorizable_store (vec_info *vinfo,
>                   vect_get_slp_defs (mask_node, &vec_masks);
>  
>                 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> -                 vect_get_gather_scatter_ops (loop, slp_node, &gs_info,
> +                 vect_get_gather_scatter_ops (loop, slp_node,
>                                                &dataref_ptr, &vec_offsets);
>                 else
>                   dataref_ptr
> @@ -8555,9 +8573,9 @@ vectorizable_store (vec_info *vinfo,
>             if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
>               vec_offset = vec_offsets[j];
>  
> -           tree scale = size_int (gs_info.scale);
> +           tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
>  
> -           if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
> +           if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE)
>               {
>                 if (loop_lens)
>                   final_len = vect_get_loop_len (loop_vinfo, gsi,
> @@ -8627,13 +8645,14 @@ vectorizable_store (vec_info *vinfo,
>                 continue;
>               }
>  
> +             tree offset_vectype = TREE_TYPE (vec_offsets[0]);
>               poly_uint64 offset_nunits
> -               = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
> +               = TYPE_VECTOR_SUBPARTS (offset_vectype);
>               if (known_eq (nunits, offset_nunits))
>                 {
>                   new_stmt = vect_build_one_scatter_store_call
> -                                (vinfo, stmt_info, gsi, &gs_info,
> -                                 dataref_ptr, vec_offsets[j],
> +                                (vinfo, stmt_info, slp_node, gsi,
> +                                 ls.gs.decl, dataref_ptr, vec_offsets[j],
>                                   vec_oprnd, final_mask);
>                   vect_finish_stmt_generation (vinfo, stmt_info,
>                                                new_stmt, gsi);
> @@ -8644,7 +8663,7 @@ vectorizable_store (vec_info *vinfo,
>                      lanes but the builtins will store full vectype
>                      data from the lower lanes.  */
>                   new_stmt = vect_build_one_scatter_store_call
> -                                (vinfo, stmt_info, gsi, &gs_info,
> +                                (vinfo, stmt_info, slp_node, gsi, ls.gs.decl,
>                                   dataref_ptr, vec_offsets[2 * j],
>                                   vec_oprnd, final_mask);
>                   vect_finish_stmt_generation (vinfo, stmt_info,
> @@ -8670,14 +8689,14 @@ vectorizable_store (vec_info *vinfo,
>                                                       VEC_UNPACK_HI_EXPR,
>                                                       final_mask);
>                       final_mask = make_ssa_name
> -                                   (truth_type_for (gs_info.offset_vectype));
> +                                   (truth_type_for (offset_vectype));
>                       gimple_set_lhs (new_stmt, final_mask);
>                       vect_finish_stmt_generation (vinfo, stmt_info,
>                                                    new_stmt, gsi);
>                       }
>  
>                   new_stmt = vect_build_one_scatter_store_call
> -                               (vinfo, stmt_info, gsi, &gs_info,
> +                               (vinfo, stmt_info, slp_node, gsi, ls.gs.decl,
>                                  dataref_ptr, vec_offsets[2 * j + 1],
>                                  vec_oprnd, final_mask);
>                   vect_finish_stmt_generation (vinfo, stmt_info,
> @@ -8710,8 +8729,8 @@ vectorizable_store (vec_info *vinfo,
>                     }
>  
>                   new_stmt = vect_build_one_scatter_store_call
> -                                (vinfo, stmt_info, gsi, &gs_info,
> -                                 dataref_ptr, vec_offset,
> +                                (vinfo, stmt_info, slp_node, gsi,
> +                                 ls.gs.decl, dataref_ptr, vec_offset,
>                                   vec_oprnd, final_mask);
>                   vect_finish_stmt_generation (vinfo, stmt_info,
>                                                new_stmt, gsi);
> @@ -8742,9 +8761,10 @@ vectorizable_store (vec_info *vinfo,
>                 continue;
>               }
>  
> +           tree offset_vectype = TREE_TYPE (vec_offsets[0]);
>             unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
>             unsigned HOST_WIDE_INT const_offset_nunits
> -             = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
> +             = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant ();
>             vec<constructor_elt, va_gc> *ctor_elts;
>             vec_alloc (ctor_elts, const_nunits);
>             gimple_seq stmts = NULL;
> @@ -8759,7 +8779,7 @@ vectorizable_store (vec_info *vinfo,
>             unsigned elt_offset
>               = (j % factor) * const_nunits;
>             tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
> -           tree scale = size_int (gs_info.scale);
> +           tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
>             tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
>             for (unsigned k = 0; k < const_nunits; ++k)
>               {
> @@ -9238,7 +9258,6 @@ vectorizable_load (vec_info *vinfo,
>    bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
>    poly_uint64 vf;
>    tree aggr_type;
> -  gather_scatter_info gs_info;
>    tree ref_type;
>    enum vect_def_type mask_dt = vect_unknown_def_type;
>    enum vect_def_type els_dt = vect_unknown_def_type;
> @@ -9372,20 +9391,24 @@ vectorizable_load (vec_info *vinfo,
>    else
>      group_size = 1;
>  
> -  vect_memory_access_type memory_access_type;
> -  enum dr_alignment_support alignment_support_scheme;
> -  int misalignment;
> -  poly_int64 poffset;
> -  internal_fn lanes_ifn;
> -  auto_vec<int> elsvals;
> -  int maskload_elsval = 0;
> -  bool need_zeroing = false;
> -  if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node,
> -                         VLS_LOAD, &memory_access_type, &poffset,
> -                         &alignment_support_scheme, &misalignment, &gs_info,
> -                         &lanes_ifn, &elsvals))
> +  vect_load_store_data _ls_data;
> +  vect_load_store_data &ls = slp_node->get_data (_ls_data);
> +  if (cost_vec
> +      && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, 
> mask_node,
> +                            VLS_LOAD, &ls))
>      return false;
> +  /* Temporary aliases to analysis data, should not be modified through
> +     these.  */
> +  const vect_memory_access_type memory_access_type = ls.memory_access_type;
> +  const dr_alignment_support alignment_support_scheme
> +    = ls.alignment_support_scheme;
> +  const int misalignment = ls.misalignment;
> +  const poly_int64 poffset = ls.poffset;
> +  const internal_fn lanes_ifn = ls.lanes_ifn;
> +  const vec<int> &elsvals = ls.elsvals;
>  
> +  int maskload_elsval = 0;
> +  bool need_zeroing = false;
>  
>    /* We might need to explicitly zero inactive elements if there are
>       padding bits in the type that might leak otherwise.
> @@ -9456,7 +9479,7 @@ vectorizable_load (vec_info *vinfo,
>         if (!VECTOR_MODE_P (vec_mode)
>             || !can_vec_mask_load_store_p (vec_mode,
>                                            TYPE_MODE (mask_vectype),
> -                                          true, NULL, &elsvals))
> +                                          true, NULL, &ls.elsvals))
>           return false;
>       }
>        else if (memory_access_type != VMAT_LOAD_STORE_LANES
> @@ -9498,14 +9521,11 @@ vectorizable_load (vec_info *vinfo,
>         return false;
>       }
>  
> -      SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
> -
>        if (loop_vinfo
>         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
>       check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
> -                                           VLS_LOAD, group_size,
> -                                           memory_access_type, &gs_info,
> -                                           mask_node, &elsvals);
> +                                           VLS_LOAD, group_size, &ls,
> +                                           mask_node, &ls.elsvals);
>  
>        if (dump_enabled_p ()
>         && memory_access_type != VMAT_ELEMENTWISE
> @@ -9520,16 +9540,7 @@ vectorizable_load (vec_info *vinfo,
>       vinfo->any_known_not_updated_vssa = true;
>  
>        SLP_TREE_TYPE (slp_node) = load_vec_info_type;
> -    }
> -  else
> -    {
> -      /* Here just get the else values.  */
> -      if (loop_vinfo
> -       && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> -     check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
> -                                           VLS_LOAD, group_size,
> -                                           memory_access_type, &gs_info,
> -                                           mask_node, &elsvals);
> +      slp_node->data = new vect_load_store_data (std::move (ls));
>      }
>  
>    /* If the type needs padding we must zero inactive elements.
> @@ -9552,8 +9563,6 @@ vectorizable_load (vec_info *vinfo,
>    if (elsvals.length ())
>      maskload_elsval = *elsvals.begin ();
>  
> -  gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node));
> -
>    if (dump_enabled_p () && !costing_p)
>      dump_printf_loc (MSG_NOTE, vect_location, "transform load.\n");
>  
> @@ -9722,6 +9731,13 @@ vectorizable_load (vec_info *vinfo,
>        tree ltype = TREE_TYPE (vectype);
>        tree lvectype = vectype;
>        auto_vec<tree> dr_chain;
> +      /* ???  Modify local copies of alignment_support_scheme and
> +      misalignment, but this part of analysis should be done
> +      earlier and remembered, likewise the chosen load mode.  */
> +      const dr_alignment_support tem = alignment_support_scheme;
> +      dr_alignment_support alignment_support_scheme = tem;
> +      const int tem2 = misalignment;
> +      int misalignment = tem2;
>        if (memory_access_type == VMAT_STRIDED_SLP)
>       {
>         HOST_WIDE_INT n = gcd (group_size, const_nunits);
> @@ -10344,7 +10360,7 @@ vectorizable_load (vec_info *vinfo,
>         aggr_type = NULL_TREE;
>         bump = NULL_TREE;
>         if (!costing_p)
> -         vect_get_gather_scatter_ops (loop, slp_node, &gs_info, &dataref_ptr,
> +         vect_get_gather_scatter_ops (loop, slp_node, &dataref_ptr,
>                                        &vec_offsets);
>       }
>        else
> @@ -10352,8 +10368,9 @@ vectorizable_load (vec_info *vinfo,
>         aggr_type = elem_type;
>         if (!costing_p)
>           {
> -           vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo,
> -                                            gsi, &gs_info,
> +           vect_get_strided_load_store_ops (stmt_info, slp_node, vectype,
> +                                            ls.strided_offset_vectype,
> +                                            loop_vinfo, gsi,
>                                              &bump, &vec_offset, loop_lens);
>             dataref_ptr
>                 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
> @@ -10402,9 +10419,9 @@ vectorizable_load (vec_info *vinfo,
>             if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
>               vec_offset = vec_offsets[i];
>             tree zero = build_zero_cst (vectype);
> -           tree scale = size_int (gs_info.scale);
> +           tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
>  
> -           if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
> +           if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD)
>               {
>                 if (loop_lens)
>                   final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> @@ -10476,13 +10493,14 @@ vectorizable_load (vec_info *vinfo,
>                                       slp_node, 0, vect_body);
>                 continue;
>               }
> -           poly_uint64 offset_nunits
> -               = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
> +           tree offset_vectype = TREE_TYPE (vec_offsets[0]);
> +           poly_uint64 offset_nunits = TYPE_VECTOR_SUBPARTS (offset_vectype);
>             if (known_eq (nunits, offset_nunits))
>               {
>                 new_stmt = vect_build_one_gather_load_call
> -                            (vinfo, stmt_info, vectype, gsi, &gs_info,
> -                             dataref_ptr, vec_offsets[i], final_mask);
> +                            (vinfo, stmt_info, slp_node, vectype, gsi,
> +                             ls.gs.decl, dataref_ptr, vec_offsets[i],
> +                             final_mask);
>                 data_ref = NULL_TREE;
>               }
>             else if (known_eq (nunits, offset_nunits * 2))
> @@ -10491,8 +10509,9 @@ vectorizable_load (vec_info *vinfo,
>                    lanes but the builtins will produce full vectype
>                    data with just the lower lanes filled.  */
>                 new_stmt = vect_build_one_gather_load_call
> -                            (vinfo, stmt_info, vectype, gsi, &gs_info,
> -                             dataref_ptr, vec_offsets[2 * i], final_mask);
> +                            (vinfo, stmt_info, slp_node, vectype, gsi,
> +                             ls.gs.decl, dataref_ptr, vec_offsets[2 * i],
> +                             final_mask);
>                 tree low = make_ssa_name (vectype);
>                 gimple_set_lhs (new_stmt, low);
>                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
> @@ -10523,15 +10542,15 @@ vectorizable_load (vec_info *vinfo,
>                                                     VEC_UNPACK_HI_EXPR,
>                                                     final_mask);
>                     final_mask = make_ssa_name
> -                                 (truth_type_for (gs_info.offset_vectype));
> +                                 (truth_type_for (offset_vectype));
>                     gimple_set_lhs (new_stmt, final_mask);
>                     vect_finish_stmt_generation (vinfo, stmt_info,
>                                                  new_stmt, gsi);
>                   }
>  
>                 new_stmt = vect_build_one_gather_load_call
> -                            (vinfo, stmt_info, vectype, gsi, &gs_info,
> -                             dataref_ptr,
> +                            (vinfo, stmt_info, slp_node, vectype, gsi,
> +                             ls.gs.decl, dataref_ptr,
>                               vec_offsets[2 * i + 1], final_mask);
>                 tree high = make_ssa_name (vectype);
>                 gimple_set_lhs (new_stmt, high);
> @@ -10574,7 +10593,8 @@ vectorizable_load (vec_info *vinfo,
>                                                  new_stmt, gsi);
>                   }
>                 new_stmt = vect_build_one_gather_load_call
> -                            (vinfo, stmt_info, vectype, gsi, &gs_info,
> +                            (vinfo, stmt_info, slp_node, vectype, gsi,
> +                             ls.gs.decl,
>                               dataref_ptr, vec_offset, final_mask);
>                 data_ref = NULL_TREE;
>               }
> @@ -10603,8 +10623,9 @@ vectorizable_load (vec_info *vinfo,
>                                       slp_node, 0, vect_body);
>                 continue;
>               }
> +           tree offset_vectype = TREE_TYPE (vec_offsets[0]);
>             unsigned HOST_WIDE_INT const_offset_nunits
> -             = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype) .to_constant ();
> +             = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant ();
>             vec<constructor_elt, va_gc> *ctor_elts;
>             vec_alloc (ctor_elts, const_nunits);
>             gimple_seq stmts = NULL;
> @@ -10615,7 +10636,7 @@ vectorizable_load (vec_info *vinfo,
>             vec_offset = vec_offsets[i / factor];
>             unsigned elt_offset = (i % factor) * const_nunits;
>             tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
> -           tree scale = size_int (gs_info.scale);
> +           tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
>             tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
>             for (unsigned k = 0; k < const_nunits; ++k)
>               {
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 3d8a9466982..b4229713b2c 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -270,6 +270,26 @@ struct vect_simd_clone_data : vect_data {
>    auto_vec<tree> simd_clone_info;
>  };
>  
> +/* Analysis data from vectorizable_load and vectorizable_store for
> +   load_vec_info_type and store_vec_info_type.  */
> +struct vect_load_store_data : vect_data {
> +  vect_load_store_data (vect_load_store_data &&other) = default;
> +  vect_load_store_data () = default;
> +  virtual ~vect_load_store_data () = default;
> +
> +  vect_memory_access_type memory_access_type;
> +  dr_alignment_support alignment_support_scheme;
> +  int misalignment;
> +  internal_fn lanes_ifn; // VMAT_LOAD_STORE_LANES
> +  poly_int64 poffset;
> +  union {
> +      internal_fn ifn;       // VMAT_GATHER_SCATTER_IFN
> +      tree decl;     // VMAT_GATHER_SCATTER_DECL
> +  } gs;
> +  tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided
> +  auto_vec<int> elsvals;
> +};
> +
>  /* A computation tree of an SLP instance.  Each node corresponds to a group 
> of
>     stmts to be packed in a SIMD stmt.  */
>  struct _slp_tree {
> @@ -332,10 +352,6 @@ struct _slp_tree {
>  
>    int vertex;
>  
> -  /* Classifies how the load or store is going to be implemented
> -     for loop vectorization.  */
> -  vect_memory_access_type memory_access_type;
> -
>    /* The kind of operation as determined by analysis and optional
>       kind specific data.  */
>    enum stmt_vec_info_type type;
> @@ -427,12 +443,20 @@ public:
>  #define SLP_TREE_REPRESENTATIVE(S)            (S)->representative
>  #define SLP_TREE_LANES(S)                     (S)->lanes
>  #define SLP_TREE_CODE(S)                      (S)->code
> -#define SLP_TREE_MEMORY_ACCESS_TYPE(S)                (S)->memory_access_type
>  #define SLP_TREE_TYPE(S)                      (S)->type
>  #define SLP_TREE_GS_SCALE(S)                  (S)->gs_scale
>  #define SLP_TREE_GS_BASE(S)                   (S)->gs_base
>  #define SLP_TREE_PERMUTE_P(S)                         ((S)->code == 
> VEC_PERM_EXPR)
>  
> +inline vect_memory_access_type
> +SLP_TREE_MEMORY_ACCESS_TYPE (slp_tree node)
> +{
> +  if (SLP_TREE_TYPE (node) == load_vec_info_type
> +      || SLP_TREE_TYPE (node) == store_vec_info_type)
> +    return static_cast<vect_load_store_data *> 
> (node->data)->memory_access_type;
> +  return VMAT_UNINITIALIZED;
> +}
> +
>  enum vect_partial_vector_style {
>      vect_partial_vectors_none,
>      vect_partial_vectors_while_ult,
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to