On Tue, 19 Aug 2025, Richard Biener wrote: > The following is a patch to make us record the get_load_store_info > results from load/store analysis and re-use them during transform. > In particular this moves where SLP_TREE_MEMORY_ACCESS_TYPE is stored. > > A major hassle was (and still is, to some extent), gather/scatter > handling with it's accompaning gather_scatter_info. As > get_load_store_info no longer fully re-analyzes them but parts of > the information is recorded in the SLP tree during SLP build the > following goes and eliminates the use of this data in > vectorizable_load/store, instead recording the other relevant > part in the load-store info (namely the IFN or decl chosen). > Strided load handling keeps the re-analysis but populates the > data back to the SLP tree and the load-store info. That's something > for further improvement. This also shows that early classifying > a SLP tree as load/store and allocating the load-store data might > be a way to move back all of the gather/scatter auxiliary data > into one place. > > Rather than mass-replacing references to variables I've kept the > locals but made them read-only, only adjusting a few elsval setters > and adding a FIXME to strided SLP handling of alignment (allowing > local override there). > > The FIXME shows that while a lot of analysis is done in > get_load_store_type that's far from all of it. There's also > a possibility that splitting up the transform phase into > separate load/store def types, based on VMAT choosen, will make > the code more maintainable.
Bootstrapped and tested on x86_64-unknown-linux-gnu, the riscv CI picked it up without issues and Tamar tested on aarch64 (I also tested cross for that). I have pushed this series now. Richard. > * tree-vectorizer.h (vect_load_store_data): New. > (_slp_tree::memory_access_type): Remove. > (SLP_TREE_MEMORY_ACCESS_TYPE): Turn into inline function. > * tree-vect-slp.cc (_slp_tree::_slp_tree): Do not > initialize SLP_TREE_MEMORY_ACCESS_TYPE. > * tree-vect-stmts.cc (check_load_store_for_partial_vectors): > Remove gather_scatter_info pointer argument, instead get > info from the SLP node. > (vect_build_one_gather_load_call): Get SLP node and builtin > decl as argument and remove uses of gather_scatter_info. > (vect_build_one_scatter_store_call): Likewise. > (vect_get_gather_scatter_ops): Remove uses of gather_scatter_info. > (vect_get_strided_load_store_ops): Get SLP node and remove > uses of gather_scatter_info. > (get_load_store_type): Take pointer to vect_load_store_data > instead of individual pointers. > (vectorizable_store): Adjust. Re-use get_load_store_type > result from analysis time. > (vectorizable_load): Likewise. > --- > gcc/tree-vect-slp.cc | 1 - > gcc/tree-vect-stmts.cc | 285 ++++++++++++++++++++++------------------- > gcc/tree-vectorizer.h | 34 ++++- > 3 files changed, 182 insertions(+), 138 deletions(-) > > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > index 05363f89cf4..628d8a02ba9 100644 > --- a/gcc/tree-vect-slp.cc > +++ b/gcc/tree-vect-slp.cc > @@ -126,7 +126,6 @@ _slp_tree::_slp_tree () > this->avoid_stlf_fail = false; > SLP_TREE_VECTYPE (this) = NULL_TREE; > SLP_TREE_REPRESENTATIVE (this) = NULL; > - SLP_TREE_MEMORY_ACCESS_TYPE (this) = VMAT_UNINITIALIZED; > SLP_TREE_REF_COUNT (this) = 1; > this->failed = NULL; > this->max_nunits = 1; > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index 675c6e2e683..ed6e5a11122 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -1428,12 +1428,12 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > slp_tree slp_node, > vec_load_store_type vls_type, > int group_size, > - vect_memory_access_type > - memory_access_type, > - const gather_scatter_info *gs_info, > + vect_load_store_data *ls, > slp_tree mask_node, > vec<int> *elsvals = nullptr) > { > + vect_memory_access_type memory_access_type = ls->memory_access_type; > + > /* Invariant loads need no special support. */ > if (memory_access_type == VMAT_INVARIANT) > return; > @@ -1492,16 +1492,20 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > internal_fn len_ifn = (is_load > ? IFN_MASK_LEN_GATHER_LOAD > : IFN_MASK_LEN_SCATTER_STORE); > + stmt_vec_info repr = SLP_TREE_REPRESENTATIVE (slp_node); > + tree off_vectype = (STMT_VINFO_GATHER_SCATTER_P (repr) > + ? SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0]) > + : ls->strided_offset_vectype); > + tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr)); > + int scale = SLP_TREE_GS_SCALE (slp_node); > if (internal_gather_scatter_fn_supported_p (len_ifn, vectype, > - gs_info->memory_type, > - gs_info->offset_vectype, > - gs_info->scale, > + memory_type, > + off_vectype, scale, > elsvals)) > vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); > else if (internal_gather_scatter_fn_supported_p (ifn, vectype, > - gs_info->memory_type, > - gs_info->offset_vectype, > - gs_info->scale, > + memory_type, > + off_vectype, scale, > elsvals) > || memory_access_type == VMAT_GATHER_SCATTER_LEGACY) > vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, > @@ -1960,14 +1964,15 @@ static bool > get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, > tree vectype, slp_tree slp_node, > bool masked_p, vec_load_store_type vls_type, > - vect_memory_access_type *memory_access_type, > - poly_int64 *poffset, > - dr_alignment_support *alignment_support_scheme, > - int *misalignment, > - gather_scatter_info *gs_info, > - internal_fn *lanes_ifn, > - vec<int> *elsvals = nullptr) > + vect_load_store_data *ls) > { > + vect_memory_access_type *memory_access_type = &ls->memory_access_type; > + poly_int64 *poffset = &ls->poffset; > + dr_alignment_support *alignment_support_scheme > + = &ls->alignment_support_scheme; > + int *misalignment = &ls->misalignment; > + internal_fn *lanes_ifn = &ls->lanes_ifn; > + vec<int> *elsvals = &ls->elsvals; > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); > class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; > @@ -2025,34 +2030,29 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > { > slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0]; > tree offset_vectype = SLP_TREE_VECTYPE (offset_node); > - memset (gs_info, 0, sizeof (gather_scatter_info)); > - gs_info->offset_vectype = offset_vectype; > - gs_info->scale = SLP_TREE_GS_SCALE (slp_node); > - gs_info->base = SLP_TREE_GS_BASE (slp_node); > - gs_info->memory_type = TREE_TYPE (DR_REF (first_dr_info->dr)); > - gs_info->decl = NULL_TREE; > - gs_info->ifn = IFN_LAST; > + int scale = SLP_TREE_GS_SCALE (slp_node); > + tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr)); > tree tem; > if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD, > masked_p, vectype, > - gs_info->memory_type, > - offset_vectype, gs_info->scale, > - &gs_info->ifn, &tem, > + memory_type, > + offset_vectype, scale, > + &ls->gs.ifn, &tem, > elsvals)) > *memory_access_type = VMAT_GATHER_SCATTER_IFN; > else if (vls_type == VLS_LOAD > ? (targetm.vectorize.builtin_gather > - && (gs_info->decl > + && (ls->gs.decl > = targetm.vectorize.builtin_gather (vectype, > TREE_TYPE > (offset_vectype), > - gs_info->scale))) > + scale))) > : (targetm.vectorize.builtin_scatter > - && (gs_info->decl > + && (ls->gs.decl > = targetm.vectorize.builtin_scatter (vectype, > TREE_TYPE > (offset_vectype), > - gs_info->scale)))) > + scale)))) > *memory_access_type = VMAT_GATHER_SCATTER_LEGACY; > else > { > @@ -2317,11 +2317,20 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > || *memory_access_type == VMAT_STRIDED_SLP) > && !STMT_VINFO_GATHER_SCATTER_P (stmt_info) > && SLP_TREE_LANES (slp_node) == 1 > - && loop_vinfo > - && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo, > - masked_p, gs_info, elsvals, > - group_size, single_element_p)) > - *memory_access_type = VMAT_GATHER_SCATTER_IFN; > + && loop_vinfo) > + { > + gather_scatter_info gs_info; > + if (vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo, > + masked_p, &gs_info, elsvals, > + group_size, single_element_p)) > + { > + SLP_TREE_GS_SCALE (slp_node) = gs_info.scale; > + SLP_TREE_GS_BASE (slp_node) = error_mark_node; > + ls->gs.ifn = gs_info.ifn; > + ls->strided_offset_vectype = gs_info.offset_vectype; > + *memory_access_type = VMAT_GATHER_SCATTER_IFN; > + } > + } > > if (*memory_access_type == VMAT_CONTIGUOUS_DOWN > || *memory_access_type == VMAT_CONTIGUOUS_REVERSE) > @@ -2715,13 +2724,12 @@ vect_get_mask_load_else (int elsval, tree type) > > static gimple * > vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, > - tree vectype, > - gimple_stmt_iterator *gsi, > - const gather_scatter_info *gs_info, > + slp_tree slp_node, tree vectype, > + gimple_stmt_iterator *gsi, tree decl, > tree ptr, tree offset, tree mask) > { > - tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl)); > - tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl)); > + tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl)); > + tree rettype = TREE_TYPE (TREE_TYPE (decl)); > tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); > /* ptrtype */ arglist = TREE_CHAIN (arglist); > tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); > @@ -2787,8 +2795,8 @@ vect_build_one_gather_load_call (vec_info *vinfo, > stmt_vec_info stmt_info, > mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype); > } > > - tree scale = build_int_cst (scaletype, gs_info->scale); > - gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op, > + tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node)); > + gimple *new_stmt = gimple_build_call (decl, 5, src_op, ptr, op, > mask_op, scale); > > if (!useless_type_conversion_p (vectype, rettype)) > @@ -2814,12 +2822,13 @@ vect_build_one_gather_load_call (vec_info *vinfo, > stmt_vec_info stmt_info, > > static gimple * > vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, > + slp_tree slp_node, > gimple_stmt_iterator *gsi, > - const gather_scatter_info *gs_info, > + tree decl, > tree ptr, tree offset, tree oprnd, tree mask) > { > - tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl)); > - tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl)); > + tree rettype = TREE_TYPE (TREE_TYPE (decl)); > + tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl)); > /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist); > tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); > tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); > @@ -2883,9 +2892,9 @@ vect_build_one_scatter_store_call (vec_info *vinfo, > stmt_vec_info stmt_info, > op = var; > } > > - tree scale = build_int_cst (scaletype, gs_info->scale); > + tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node)); > gcall *new_stmt > - = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale); > + = gimple_build_call (decl, 5, ptr, mask_arg, op, src, scale); > return new_stmt; > } > > @@ -2897,11 +2906,11 @@ vect_build_one_scatter_store_call (vec_info *vinfo, > stmt_vec_info stmt_info, > > static void > vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node, > - const gather_scatter_info *gs_info, > tree *dataref_ptr, vec<tree> *vec_offset) > { > gimple_seq stmts = NULL; > - *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, > NULL_TREE); > + *dataref_ptr = force_gimple_operand (SLP_TREE_GS_BASE (slp_node), > + &stmts, true, NULL_TREE); > if (stmts != NULL) > { > basic_block new_bb; > @@ -2922,10 +2931,10 @@ vect_get_gather_scatter_ops (class loop *loop, > slp_tree slp_node, > I * DR_STEP / SCALE. */ > > static void > -vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype, > +vect_get_strided_load_store_ops (stmt_vec_info stmt_info, slp_tree node, > + tree vectype, tree offset_vectype, > loop_vec_info loop_vinfo, > gimple_stmt_iterator *gsi, > - const gather_scatter_info *gs_info, > tree *dataref_bump, tree *vec_offset, > vec_loop_lens *loop_lens) > { > @@ -2966,15 +2975,15 @@ vect_get_strided_load_store_ops (stmt_vec_info > stmt_info, tree vectype, > > /* The offset given in GS_INFO can have pointer type, so use the element > type of the vector instead. */ > - tree offset_type = TREE_TYPE (gs_info->offset_vectype); > + tree offset_type = TREE_TYPE (offset_vectype); > > /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. > */ > tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)), > - ssize_int (gs_info->scale)); > + ssize_int (SLP_TREE_GS_SCALE (node))); > step = fold_convert (offset_type, step); > > /* Create {0, X, X*2, X*3, ...}. */ > - tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype, > + tree offset = fold_build2 (VEC_SERIES_EXPR, offset_vectype, > build_zero_cst (offset_type), step); > *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset); > } > @@ -7729,7 +7738,6 @@ vectorizable_store (vec_info *vinfo, > unsigned int vec_num; > bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); > tree aggr_type; > - gather_scatter_info gs_info; > poly_uint64 vf; > vec_load_store_type vls_type; > tree ref_type; > @@ -7814,16 +7822,20 @@ vectorizable_store (vec_info *vinfo, > if (!STMT_VINFO_DATA_REF (stmt_info)) > return false; > > - vect_memory_access_type memory_access_type; > - enum dr_alignment_support alignment_support_scheme; > - int misalignment; > - poly_int64 poffset; > - internal_fn lanes_ifn; > - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, > - vls_type, &memory_access_type, &poffset, > - &alignment_support_scheme, &misalignment, &gs_info, > - &lanes_ifn)) > + vect_load_store_data _ls_data; > + vect_load_store_data &ls = slp_node->get_data (_ls_data); > + if (cost_vec > + && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, > mask_node, > + vls_type, &_ls_data)) > return false; > + /* Temporary aliases to analysis data, should not be modified through > + these. */ > + const vect_memory_access_type memory_access_type = ls.memory_access_type; > + const dr_alignment_support alignment_support_scheme > + = ls.alignment_support_scheme; > + const int misalignment = ls.misalignment; > + const poly_int64 poffset = ls.poffset; > + const internal_fn lanes_ifn = ls.lanes_ifn; > > if (slp_node->ldst_lanes > && memory_access_type != VMAT_LOAD_STORE_LANES) > @@ -7895,13 +7907,10 @@ vectorizable_store (vec_info *vinfo, > bool costing_p = cost_vec; > if (costing_p) /* transformation not required. */ > { > - SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type; > - > if (loop_vinfo > && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) > check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, > - vls_type, group_size, > - memory_access_type, &gs_info, > + vls_type, group_size, &ls, > mask_node); > > if (!vect_maybe_update_slp_op_vectype (op_node, vectype) > @@ -7924,8 +7933,8 @@ vectorizable_store (vec_info *vinfo, > "Vectorizing an unaligned access.\n"); > > SLP_TREE_TYPE (slp_node) = store_vec_info_type; > + slp_node->data = new vect_load_store_data (std::move (ls)); > } > - gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node)); > > /* Transform. */ > > @@ -8020,6 +8029,14 @@ vectorizable_store (vec_info *vinfo, > ... > */ > > + /* ??? Modify local copies of alignment_support_scheme and > + misalignment, but this part of analysis should be done > + earlier and remembered, likewise the chosen load mode. */ > + const dr_alignment_support tem = alignment_support_scheme; > + dr_alignment_support alignment_support_scheme = tem; > + const int tem2 = misalignment; > + int misalignment = tem2; > + > unsigned nstores = const_nunits; > unsigned lnel = 1; > tree ltype = elem_type; > @@ -8289,8 +8306,9 @@ vectorizable_store (vec_info *vinfo, > { > aggr_type = elem_type; > if (!costing_p) > - vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, > - gsi, &gs_info, > + vect_get_strided_load_store_ops (stmt_info, slp_node, vectype, > + ls.strided_offset_vectype, > + loop_vinfo, gsi, > &bump, &vec_offset, loop_lens); > } > else > @@ -8497,7 +8515,7 @@ vectorizable_store (vec_info *vinfo, > vect_get_slp_defs (mask_node, &vec_masks); > > if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > - vect_get_gather_scatter_ops (loop, slp_node, &gs_info, > + vect_get_gather_scatter_ops (loop, slp_node, > &dataref_ptr, &vec_offsets); > else > dataref_ptr > @@ -8555,9 +8573,9 @@ vectorizable_store (vec_info *vinfo, > if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > vec_offset = vec_offsets[j]; > > - tree scale = size_int (gs_info.scale); > + tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); > > - if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE) > + if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE) > { > if (loop_lens) > final_len = vect_get_loop_len (loop_vinfo, gsi, > @@ -8627,13 +8645,14 @@ vectorizable_store (vec_info *vinfo, > continue; > } > > + tree offset_vectype = TREE_TYPE (vec_offsets[0]); > poly_uint64 offset_nunits > - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype); > + = TYPE_VECTOR_SUBPARTS (offset_vectype); > if (known_eq (nunits, offset_nunits)) > { > new_stmt = vect_build_one_scatter_store_call > - (vinfo, stmt_info, gsi, &gs_info, > - dataref_ptr, vec_offsets[j], > + (vinfo, stmt_info, slp_node, gsi, > + ls.gs.decl, dataref_ptr, vec_offsets[j], > vec_oprnd, final_mask); > vect_finish_stmt_generation (vinfo, stmt_info, > new_stmt, gsi); > @@ -8644,7 +8663,7 @@ vectorizable_store (vec_info *vinfo, > lanes but the builtins will store full vectype > data from the lower lanes. */ > new_stmt = vect_build_one_scatter_store_call > - (vinfo, stmt_info, gsi, &gs_info, > + (vinfo, stmt_info, slp_node, gsi, ls.gs.decl, > dataref_ptr, vec_offsets[2 * j], > vec_oprnd, final_mask); > vect_finish_stmt_generation (vinfo, stmt_info, > @@ -8670,14 +8689,14 @@ vectorizable_store (vec_info *vinfo, > VEC_UNPACK_HI_EXPR, > final_mask); > final_mask = make_ssa_name > - (truth_type_for (gs_info.offset_vectype)); > + (truth_type_for (offset_vectype)); > gimple_set_lhs (new_stmt, final_mask); > vect_finish_stmt_generation (vinfo, stmt_info, > new_stmt, gsi); > } > > new_stmt = vect_build_one_scatter_store_call > - (vinfo, stmt_info, gsi, &gs_info, > + (vinfo, stmt_info, slp_node, gsi, ls.gs.decl, > dataref_ptr, vec_offsets[2 * j + 1], > vec_oprnd, final_mask); > vect_finish_stmt_generation (vinfo, stmt_info, > @@ -8710,8 +8729,8 @@ vectorizable_store (vec_info *vinfo, > } > > new_stmt = vect_build_one_scatter_store_call > - (vinfo, stmt_info, gsi, &gs_info, > - dataref_ptr, vec_offset, > + (vinfo, stmt_info, slp_node, gsi, > + ls.gs.decl, dataref_ptr, vec_offset, > vec_oprnd, final_mask); > vect_finish_stmt_generation (vinfo, stmt_info, > new_stmt, gsi); > @@ -8742,9 +8761,10 @@ vectorizable_store (vec_info *vinfo, > continue; > } > > + tree offset_vectype = TREE_TYPE (vec_offsets[0]); > unsigned HOST_WIDE_INT const_nunits = nunits.to_constant (); > unsigned HOST_WIDE_INT const_offset_nunits > - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant (); > + = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant (); > vec<constructor_elt, va_gc> *ctor_elts; > vec_alloc (ctor_elts, const_nunits); > gimple_seq stmts = NULL; > @@ -8759,7 +8779,7 @@ vectorizable_store (vec_info *vinfo, > unsigned elt_offset > = (j % factor) * const_nunits; > tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset)); > - tree scale = size_int (gs_info.scale); > + tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); > tree ltype = build_aligned_type (TREE_TYPE (vectype), align); > for (unsigned k = 0; k < const_nunits; ++k) > { > @@ -9238,7 +9258,6 @@ vectorizable_load (vec_info *vinfo, > bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); > poly_uint64 vf; > tree aggr_type; > - gather_scatter_info gs_info; > tree ref_type; > enum vect_def_type mask_dt = vect_unknown_def_type; > enum vect_def_type els_dt = vect_unknown_def_type; > @@ -9372,20 +9391,24 @@ vectorizable_load (vec_info *vinfo, > else > group_size = 1; > > - vect_memory_access_type memory_access_type; > - enum dr_alignment_support alignment_support_scheme; > - int misalignment; > - poly_int64 poffset; > - internal_fn lanes_ifn; > - auto_vec<int> elsvals; > - int maskload_elsval = 0; > - bool need_zeroing = false; > - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, > - VLS_LOAD, &memory_access_type, &poffset, > - &alignment_support_scheme, &misalignment, &gs_info, > - &lanes_ifn, &elsvals)) > + vect_load_store_data _ls_data; > + vect_load_store_data &ls = slp_node->get_data (_ls_data); > + if (cost_vec > + && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, > mask_node, > + VLS_LOAD, &ls)) > return false; > + /* Temporary aliases to analysis data, should not be modified through > + these. */ > + const vect_memory_access_type memory_access_type = ls.memory_access_type; > + const dr_alignment_support alignment_support_scheme > + = ls.alignment_support_scheme; > + const int misalignment = ls.misalignment; > + const poly_int64 poffset = ls.poffset; > + const internal_fn lanes_ifn = ls.lanes_ifn; > + const vec<int> &elsvals = ls.elsvals; > > + int maskload_elsval = 0; > + bool need_zeroing = false; > > /* We might need to explicitly zero inactive elements if there are > padding bits in the type that might leak otherwise. > @@ -9456,7 +9479,7 @@ vectorizable_load (vec_info *vinfo, > if (!VECTOR_MODE_P (vec_mode) > || !can_vec_mask_load_store_p (vec_mode, > TYPE_MODE (mask_vectype), > - true, NULL, &elsvals)) > + true, NULL, &ls.elsvals)) > return false; > } > else if (memory_access_type != VMAT_LOAD_STORE_LANES > @@ -9498,14 +9521,11 @@ vectorizable_load (vec_info *vinfo, > return false; > } > > - SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type; > - > if (loop_vinfo > && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) > check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, > - VLS_LOAD, group_size, > - memory_access_type, &gs_info, > - mask_node, &elsvals); > + VLS_LOAD, group_size, &ls, > + mask_node, &ls.elsvals); > > if (dump_enabled_p () > && memory_access_type != VMAT_ELEMENTWISE > @@ -9520,16 +9540,7 @@ vectorizable_load (vec_info *vinfo, > vinfo->any_known_not_updated_vssa = true; > > SLP_TREE_TYPE (slp_node) = load_vec_info_type; > - } > - else > - { > - /* Here just get the else values. */ > - if (loop_vinfo > - && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) > - check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, > - VLS_LOAD, group_size, > - memory_access_type, &gs_info, > - mask_node, &elsvals); > + slp_node->data = new vect_load_store_data (std::move (ls)); > } > > /* If the type needs padding we must zero inactive elements. > @@ -9552,8 +9563,6 @@ vectorizable_load (vec_info *vinfo, > if (elsvals.length ()) > maskload_elsval = *elsvals.begin (); > > - gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node)); > - > if (dump_enabled_p () && !costing_p) > dump_printf_loc (MSG_NOTE, vect_location, "transform load.\n"); > > @@ -9722,6 +9731,13 @@ vectorizable_load (vec_info *vinfo, > tree ltype = TREE_TYPE (vectype); > tree lvectype = vectype; > auto_vec<tree> dr_chain; > + /* ??? Modify local copies of alignment_support_scheme and > + misalignment, but this part of analysis should be done > + earlier and remembered, likewise the chosen load mode. */ > + const dr_alignment_support tem = alignment_support_scheme; > + dr_alignment_support alignment_support_scheme = tem; > + const int tem2 = misalignment; > + int misalignment = tem2; > if (memory_access_type == VMAT_STRIDED_SLP) > { > HOST_WIDE_INT n = gcd (group_size, const_nunits); > @@ -10344,7 +10360,7 @@ vectorizable_load (vec_info *vinfo, > aggr_type = NULL_TREE; > bump = NULL_TREE; > if (!costing_p) > - vect_get_gather_scatter_ops (loop, slp_node, &gs_info, &dataref_ptr, > + vect_get_gather_scatter_ops (loop, slp_node, &dataref_ptr, > &vec_offsets); > } > else > @@ -10352,8 +10368,9 @@ vectorizable_load (vec_info *vinfo, > aggr_type = elem_type; > if (!costing_p) > { > - vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, > - gsi, &gs_info, > + vect_get_strided_load_store_ops (stmt_info, slp_node, vectype, > + ls.strided_offset_vectype, > + loop_vinfo, gsi, > &bump, &vec_offset, loop_lens); > dataref_ptr > = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, > @@ -10402,9 +10419,9 @@ vectorizable_load (vec_info *vinfo, > if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > vec_offset = vec_offsets[i]; > tree zero = build_zero_cst (vectype); > - tree scale = size_int (gs_info.scale); > + tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); > > - if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD) > + if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD) > { > if (loop_lens) > final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, > @@ -10476,13 +10493,14 @@ vectorizable_load (vec_info *vinfo, > slp_node, 0, vect_body); > continue; > } > - poly_uint64 offset_nunits > - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype); > + tree offset_vectype = TREE_TYPE (vec_offsets[0]); > + poly_uint64 offset_nunits = TYPE_VECTOR_SUBPARTS (offset_vectype); > if (known_eq (nunits, offset_nunits)) > { > new_stmt = vect_build_one_gather_load_call > - (vinfo, stmt_info, vectype, gsi, &gs_info, > - dataref_ptr, vec_offsets[i], final_mask); > + (vinfo, stmt_info, slp_node, vectype, gsi, > + ls.gs.decl, dataref_ptr, vec_offsets[i], > + final_mask); > data_ref = NULL_TREE; > } > else if (known_eq (nunits, offset_nunits * 2)) > @@ -10491,8 +10509,9 @@ vectorizable_load (vec_info *vinfo, > lanes but the builtins will produce full vectype > data with just the lower lanes filled. */ > new_stmt = vect_build_one_gather_load_call > - (vinfo, stmt_info, vectype, gsi, &gs_info, > - dataref_ptr, vec_offsets[2 * i], final_mask); > + (vinfo, stmt_info, slp_node, vectype, gsi, > + ls.gs.decl, dataref_ptr, vec_offsets[2 * i], > + final_mask); > tree low = make_ssa_name (vectype); > gimple_set_lhs (new_stmt, low); > vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); > @@ -10523,15 +10542,15 @@ vectorizable_load (vec_info *vinfo, > VEC_UNPACK_HI_EXPR, > final_mask); > final_mask = make_ssa_name > - (truth_type_for (gs_info.offset_vectype)); > + (truth_type_for (offset_vectype)); > gimple_set_lhs (new_stmt, final_mask); > vect_finish_stmt_generation (vinfo, stmt_info, > new_stmt, gsi); > } > > new_stmt = vect_build_one_gather_load_call > - (vinfo, stmt_info, vectype, gsi, &gs_info, > - dataref_ptr, > + (vinfo, stmt_info, slp_node, vectype, gsi, > + ls.gs.decl, dataref_ptr, > vec_offsets[2 * i + 1], final_mask); > tree high = make_ssa_name (vectype); > gimple_set_lhs (new_stmt, high); > @@ -10574,7 +10593,8 @@ vectorizable_load (vec_info *vinfo, > new_stmt, gsi); > } > new_stmt = vect_build_one_gather_load_call > - (vinfo, stmt_info, vectype, gsi, &gs_info, > + (vinfo, stmt_info, slp_node, vectype, gsi, > + ls.gs.decl, > dataref_ptr, vec_offset, final_mask); > data_ref = NULL_TREE; > } > @@ -10603,8 +10623,9 @@ vectorizable_load (vec_info *vinfo, > slp_node, 0, vect_body); > continue; > } > + tree offset_vectype = TREE_TYPE (vec_offsets[0]); > unsigned HOST_WIDE_INT const_offset_nunits > - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype) .to_constant (); > + = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant (); > vec<constructor_elt, va_gc> *ctor_elts; > vec_alloc (ctor_elts, const_nunits); > gimple_seq stmts = NULL; > @@ -10615,7 +10636,7 @@ vectorizable_load (vec_info *vinfo, > vec_offset = vec_offsets[i / factor]; > unsigned elt_offset = (i % factor) * const_nunits; > tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset)); > - tree scale = size_int (gs_info.scale); > + tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); > tree ltype = build_aligned_type (TREE_TYPE (vectype), align); > for (unsigned k = 0; k < const_nunits; ++k) > { > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index 3d8a9466982..b4229713b2c 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -270,6 +270,26 @@ struct vect_simd_clone_data : vect_data { > auto_vec<tree> simd_clone_info; > }; > > +/* Analysis data from vectorizable_load and vectorizable_store for > + load_vec_info_type and store_vec_info_type. */ > +struct vect_load_store_data : vect_data { > + vect_load_store_data (vect_load_store_data &&other) = default; > + vect_load_store_data () = default; > + virtual ~vect_load_store_data () = default; > + > + vect_memory_access_type memory_access_type; > + dr_alignment_support alignment_support_scheme; > + int misalignment; > + internal_fn lanes_ifn; // VMAT_LOAD_STORE_LANES > + poly_int64 poffset; > + union { > + internal_fn ifn; // VMAT_GATHER_SCATTER_IFN > + tree decl; // VMAT_GATHER_SCATTER_DECL > + } gs; > + tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided > + auto_vec<int> elsvals; > +}; > + > /* A computation tree of an SLP instance. Each node corresponds to a group > of > stmts to be packed in a SIMD stmt. */ > struct _slp_tree { > @@ -332,10 +352,6 @@ struct _slp_tree { > > int vertex; > > - /* Classifies how the load or store is going to be implemented > - for loop vectorization. */ > - vect_memory_access_type memory_access_type; > - > /* The kind of operation as determined by analysis and optional > kind specific data. */ > enum stmt_vec_info_type type; > @@ -427,12 +443,20 @@ public: > #define SLP_TREE_REPRESENTATIVE(S) (S)->representative > #define SLP_TREE_LANES(S) (S)->lanes > #define SLP_TREE_CODE(S) (S)->code > -#define SLP_TREE_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type > #define SLP_TREE_TYPE(S) (S)->type > #define SLP_TREE_GS_SCALE(S) (S)->gs_scale > #define SLP_TREE_GS_BASE(S) (S)->gs_base > #define SLP_TREE_PERMUTE_P(S) ((S)->code == > VEC_PERM_EXPR) > > +inline vect_memory_access_type > +SLP_TREE_MEMORY_ACCESS_TYPE (slp_tree node) > +{ > + if (SLP_TREE_TYPE (node) == load_vec_info_type > + || SLP_TREE_TYPE (node) == store_vec_info_type) > + return static_cast<vect_load_store_data *> > (node->data)->memory_access_type; > + return VMAT_UNINITIALIZED; > +} > + > enum vect_partial_vector_style { > vect_partial_vectors_none, > vect_partial_vectors_while_ult, > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)