On Sun, 11 Aug 2024, Robin Dapp wrote: > This patch adds an else operand to vectorized masked load calls. > The current implementation adds else-value arguments to the respective > target-querying functions that is used to supply the vectorizer with the > proper else value. > > Right now, the only spot where a zero else value is actually enforced is > tree-ifcvt. Loop masking and other instances of masked loads in the > vectorizer itself do not use vec_cond_exprs. > > gcc/ChangeLog: > > * internal-fn.cc (internal_gather_scatter_fn_supported_p): Add > else argument. > * internal-fn.h (internal_gather_scatter_fn_supported_p): Ditto. > (MASK_LOAD_ELSE_NONE): Define. > (MASK_LOAD_ELSE_ZERO): Ditto. > (MASK_LOAD_ELSE_M1): Ditto. > (MASK_LOAD_ELSE_UNDEFINED): Ditto. > * optabs-query.cc (supports_vec_convert_optab_p): Return icode. > (get_supported_else_val): Return supported else value for > optab's operand at index. > (supports_vec_gather_load_p): Add else argument. > (supports_vec_scatter_store_p): Ditto. > * optabs-query.h (supports_vec_gather_load_p): Ditto. > (get_supported_else_val): Ditto. > * optabs-tree.cc (target_supports_mask_load_store_p): Ditto. > (can_vec_mask_load_store_p): Ditto. > (target_supports_len_load_store_p): Ditto. > (get_len_load_store_mode): Ditto. > * optabs-tree.h (target_supports_mask_load_store_p): Ditto. > (can_vec_mask_load_store_p): Ditto. > * tree-vect-data-refs.cc (vect_lanes_optab_supported_p): Ditto. > (vect_gather_scatter_fn_p): Ditto. > (vect_check_gather_scatter): Ditto. > (vect_load_lanes_supported): Ditto. > * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): > Ditto. > * tree-vect-slp.cc (vect_get_operand_map): Adjust indices for > else operand. > (vect_slp_analyze_node_operations): Skip undefined else operand. > * tree-vect-stmts.cc (exist_non_indexing_operands_for_use_p): > Add else operand handling. > (vect_get_vec_defs_for_operand): Handle undefined else operand. > (check_load_store_for_partial_vectors): Add else argument. > (vect_truncate_gather_scatter_offset): Ditto. > (vect_use_strided_gather_scatters_p): Ditto. > (get_group_load_store_type): Ditto. > (get_load_store_type): Ditto. > (vect_get_mask_load_else): Ditto. > (vect_get_else_val_from_tree): Ditto. > (vect_build_one_gather_load_call): Add zero else operand. > (vectorizable_load): Use else operand. > * tree-vectorizer.h (vect_gather_scatter_fn_p): Add else > argument. > (vect_load_lanes_supported): Ditto. > (vect_get_mask_load_else): Ditto. > (vect_get_else_val_from_tree): Ditto. > --- > gcc/internal-fn.cc | 19 +++- > gcc/internal-fn.h | 11 +- > gcc/optabs-query.cc | 83 +++++++++++--- > gcc/optabs-query.h | 3 +- > gcc/optabs-tree.cc | 43 +++++--- > gcc/optabs-tree.h | 8 +- > gcc/tree-vect-data-refs.cc | 39 +++++-- > gcc/tree-vect-patterns.cc | 17 ++- > gcc/tree-vect-slp.cc | 22 +++- > gcc/tree-vect-stmts.cc | 218 +++++++++++++++++++++++++++++-------- > gcc/tree-vectorizer.h | 11 +- > 11 files changed, 367 insertions(+), 107 deletions(-) > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index 586978e8f3f..2fc676e397c 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -4988,12 +4988,15 @@ internal_fn_stored_value_index (internal_fn fn) > or stored. OFFSET_VECTOR_TYPE is the vector type that holds the > offset from the shared base address of each loaded or stored element. > SCALE is the amount by which these offsets should be multiplied > - *after* they have been extended to address width. */ > + *after* they have been extended to address width. > + If the target supports the gather load the supported else value > + will be written to the position ELSVAL points to if it is nonzero. */ > > bool > internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type, > tree memory_element_type, > - tree offset_vector_type, int scale) > + tree offset_vector_type, int scale, > + int *elsval) > { > if (!tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vector_type)), > TYPE_SIZE (memory_element_type))) > @@ -5006,9 +5009,15 @@ internal_gather_scatter_fn_supported_p (internal_fn > ifn, tree vector_type, > TYPE_MODE (offset_vector_type)); > int output_ops = internal_load_fn_p (ifn) ? 1 : 0; > bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (offset_vector_type)); > - return (icode != CODE_FOR_nothing > - && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p)) > - && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale))); > + bool ok = false;
That = false looks like a dead assignment. > + ok = icode != CODE_FOR_nothing > + && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p)) > + && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)); > + > + if (ok && elsval) > + *elsval = get_supported_else_val (icode, 6); > + > + return ok; > } > > /* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h > index 2785a5a95a2..7b301732069 100644 > --- a/gcc/internal-fn.h > +++ b/gcc/internal-fn.h > @@ -240,9 +240,18 @@ extern int internal_fn_len_index (internal_fn); > extern int internal_fn_else_index (internal_fn); > extern int internal_fn_stored_value_index (internal_fn); > extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree, > - tree, tree, int); > + tree, tree, int, > + int * = nullptr); > extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree, > poly_uint64, unsigned int); > + > +/* Integer constants representing which else value is supported for masked > load > + functions. */ > +#define MASK_LOAD_ELSE_NONE 0 What is MASK_LOAD_ELSE_NONE used for? It's for the case there isn't any masking, right? > +#define MASK_LOAD_ELSE_ZERO -1 > +#define MASK_LOAD_ELSE_M1 -2 > +#define MASK_LOAD_ELSE_UNDEFINED -3 > + > #define VECT_PARTIAL_BIAS_UNSUPPORTED 127 > > extern signed char internal_len_load_store_bias (internal_fn ifn, > diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc > index 5149de57468..93c1d7b8485 100644 > --- a/gcc/optabs-query.cc > +++ b/gcc/optabs-query.cc > @@ -29,6 +29,9 @@ along with GCC; see the file COPYING3. If not see > #include "rtl.h" > #include "recog.h" > #include "vec-perm-indices.h" > +#include "internal-fn.h" > +#include "memmodel.h" > +#include "optabs.h" > > struct target_optabs default_target_optabs; > struct target_optabs *this_fn_optabs = &default_target_optabs; > @@ -665,34 +668,74 @@ lshift_cheap_p (bool speed_p) > that mode, given that the second mode is always an integer vector. > If MODE is VOIDmode, return true if OP supports any vector mode. */ > > -static bool > +static enum insn_code > supports_vec_convert_optab_p (optab op, machine_mode mode) > { > int start = mode == VOIDmode ? 0 : mode; > int end = mode == VOIDmode ? MAX_MACHINE_MODE - 1 : mode; > + enum insn_code icode = CODE_FOR_nothing; > for (int i = start; i <= end; ++i) > if (VECTOR_MODE_P ((machine_mode) i)) > for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j) > - if (convert_optab_handler (op, (machine_mode) i, > - (machine_mode) j) != CODE_FOR_nothing) > - return true; > + { > + if ((icode > + = convert_optab_handler (op, (machine_mode) i, > + (machine_mode) j)) != CODE_FOR_nothing) > + return icode; > + } > > - return false; > + return icode; > } > > +/* Return the supported else value for the optab referred to by ICODE. The > + index of the else operand must be specified in ELS_INDEX. > + If no else value is supported, return MASK_LOAD_ELSE_NONE. */ > +int > +get_supported_else_val (enum insn_code icode, unsigned els_index) > +{ > + const struct insn_data_d *data = &insn_data[icode]; > + machine_mode els_mode = data->operand[els_index].mode; > + > + /* For now we only support else values of 0, -1 and "undefined". */ > + /* ??? Does a -1 constant make sense for anything but integer? */ All bits set probably makes sense for all component modes. But we can worry when we need it. > + if (GET_MODE_CLASS (els_mode) == MODE_VECTOR_INT > + && insn_operand_matches (icode, els_index, CONSTM1_RTX (els_mode))) > + { > + return MASK_LOAD_ELSE_M1; > + } > + else if (insn_operand_matches (icode, els_index, gen_rtx_SCRATCH > (els_mode))) > + { > + return MASK_LOAD_ELSE_UNDEFINED; > + } > + else if (insn_operand_matches (icode, els_index, CONST0_RTX (els_mode))) > + { > + return MASK_LOAD_ELSE_ZERO; > + } Why this particular order? I'd perform the CONST0_RTX matching first and the CONSTM1_RTX matching last. I would have expected "supported else value" to report a set of supported values, it seems the setup doesn't leave any choice to the vectorizer but will force from the above order? So maybe get_default_else_val (...) instead? And factor supported_else_val_p (enum insn_code, unsigned, int) using that above for the case the vectorizer absolutely wants zero-masking for example? I think this function(s) and the #defines belong to internal-fn.{h,cc} > + return MASK_LOAD_ELSE_NONE; > +} > + > + > /* If MODE is not VOIDmode, return true if vec_gather_load is available for > that mode. If MODE is VOIDmode, return true if gather_load is available > for at least one vector mode. */ > > bool > -supports_vec_gather_load_p (machine_mode mode) > +supports_vec_gather_load_p (machine_mode mode, int *elsval) > { > - if (!this_fn_optabs->supports_vec_gather_load[mode]) > - this_fn_optabs->supports_vec_gather_load[mode] > - = (supports_vec_convert_optab_p (gather_load_optab, mode) > - || supports_vec_convert_optab_p (mask_gather_load_optab, mode) > - || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode) > - ? 1 : -1); > + enum insn_code icode = CODE_FOR_nothing; > + if (!this_fn_optabs->supports_vec_gather_load[mode] || elsval) > + { > + icode = supports_vec_convert_optab_p (gather_load_optab, mode); > + if (icode == CODE_FOR_nothing) > + icode = supports_vec_convert_optab_p (mask_gather_load_optab, mode); > + if (icode == CODE_FOR_nothing) > + icode = supports_vec_convert_optab_p (mask_len_gather_load_optab, mode); > + this_fn_optabs->supports_vec_gather_load[mode] > + = (icode != CODE_FOR_nothing) ? 1 : -1; > + } > + > + if (elsval && icode != CODE_FOR_nothing) > + *elsval = get_supported_else_val (icode, 6); > > return this_fn_optabs->supports_vec_gather_load[mode] > 0; > } > @@ -704,12 +747,18 @@ supports_vec_gather_load_p (machine_mode mode) > bool > supports_vec_scatter_store_p (machine_mode mode) > { > + enum insn_code icode; > if (!this_fn_optabs->supports_vec_scatter_store[mode]) > - this_fn_optabs->supports_vec_scatter_store[mode] > - = (supports_vec_convert_optab_p (scatter_store_optab, mode) > - || supports_vec_convert_optab_p (mask_scatter_store_optab, mode) > - || supports_vec_convert_optab_p (mask_len_scatter_store_optab, mode) > - ? 1 : -1); > + { > + icode = supports_vec_convert_optab_p (scatter_store_optab, mode); > + if (icode == CODE_FOR_nothing) > + icode = supports_vec_convert_optab_p (mask_scatter_store_optab, mode); > + if (icode == CODE_FOR_nothing) > + icode = supports_vec_convert_optab_p (mask_len_scatter_store_optab, > + mode); > + this_fn_optabs->supports_vec_scatter_store[mode] > + = (icode != CODE_FOR_nothing) ? 1 : -1; > + } > > return this_fn_optabs->supports_vec_scatter_store[mode] > 0; > } > diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h > index 0cb2c21ba85..331832bfad2 100644 > --- a/gcc/optabs-query.h > +++ b/gcc/optabs-query.h > @@ -191,9 +191,10 @@ bool can_compare_and_swap_p (machine_mode, bool); > bool can_atomic_exchange_p (machine_mode, bool); > bool can_atomic_load_p (machine_mode); > bool lshift_cheap_p (bool); > -bool supports_vec_gather_load_p (machine_mode = E_VOIDmode); > +bool supports_vec_gather_load_p (machine_mode = E_VOIDmode, int * = nullptr); > bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode); > bool can_vec_extract (machine_mode, machine_mode); > +int get_supported_else_val (enum insn_code, unsigned); > > /* Version of find_widening_optab_handler_and_mode that operates on > specific mode types. */ > diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc > index b69a5bc3676..68e1eb9167c 100644 > --- a/gcc/optabs-tree.cc > +++ b/gcc/optabs-tree.cc > @@ -554,22 +554,30 @@ target_supports_op_p (tree type, enum tree_code code, > load/store and return corresponding IFN in the last argument > (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). */ > > -static bool > +bool > target_supports_mask_load_store_p (machine_mode mode, machine_mode mask_mode, > - bool is_load, internal_fn *ifn) > + bool is_load, internal_fn *ifn, > + int *elsval) > { > optab op = is_load ? maskload_optab : maskstore_optab; > optab len_op = is_load ? mask_len_load_optab : mask_len_store_optab; > - if (convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing) > + enum insn_code icode; > + if ((icode = convert_optab_handler (op, mode, mask_mode)) > + != CODE_FOR_nothing) > { > if (ifn) > *ifn = is_load ? IFN_MASK_LOAD : IFN_MASK_STORE; > + if (elsval) > + *elsval = get_supported_else_val (icode, 3); > return true; > } > - else if (convert_optab_handler (len_op, mode, mask_mode) != > CODE_FOR_nothing) > + else if ((icode = convert_optab_handler (len_op, mode, mask_mode)) > + != CODE_FOR_nothing) > { > if (ifn) > *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE; > + if (elsval) > + *elsval = get_supported_else_val (icode, 3); > return true; > } > return false; > @@ -584,13 +592,15 @@ bool > can_vec_mask_load_store_p (machine_mode mode, > machine_mode mask_mode, > bool is_load, > - internal_fn *ifn) > + internal_fn *ifn, > + int *elsval) > { > machine_mode vmode; > > /* If mode is vector mode, check it directly. */ > if (VECTOR_MODE_P (mode)) > - return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn); > + return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn, > + elsval); > > /* Otherwise, return true if there is some vector mode with > the mask load/store supported. */ > @@ -604,7 +614,8 @@ can_vec_mask_load_store_p (machine_mode mode, > vmode = targetm.vectorize.preferred_simd_mode (smode); > if (VECTOR_MODE_P (vmode) > && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode) > - && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn)) > + && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn, > + elsval)) > return true; > > auto_vector_modes vector_modes; > @@ -612,7 +623,8 @@ can_vec_mask_load_store_p (machine_mode mode, > for (machine_mode base_mode : vector_modes) > if (related_vector_mode (base_mode, smode).exists (&vmode) > && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode) > - && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn)) > + && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn, > + elsval)) > return true; > return false; > } > @@ -626,7 +638,7 @@ can_vec_mask_load_store_p (machine_mode mode, > > static bool > target_supports_len_load_store_p (machine_mode mode, bool is_load, > - internal_fn *ifn) > + internal_fn *ifn, int *elsval) > { > optab op = is_load ? len_load_optab : len_store_optab; > optab masked_op = is_load ? mask_len_load_optab : mask_len_store_optab; > @@ -638,11 +650,15 @@ target_supports_len_load_store_p (machine_mode mode, > bool is_load, > return true; > } > machine_mode mask_mode; > + enum insn_code icode; > if (targetm.vectorize.get_mask_mode (mode).exists (&mask_mode) > - && convert_optab_handler (masked_op, mode, mask_mode) != > CODE_FOR_nothing) > + && ((icode = convert_optab_handler (masked_op, mode, mask_mode)) > + != CODE_FOR_nothing)) > { > if (ifn) > *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE; > + if (elsval) > + *elsval = get_supported_else_val (icode, 3); > return true; > } > return false; > @@ -659,19 +675,20 @@ target_supports_len_load_store_p (machine_mode mode, > bool is_load, > which optab is supported in the target. */ > > opt_machine_mode > -get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn) > +get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn, > + int *elsval) > { > gcc_assert (VECTOR_MODE_P (mode)); > > /* Check if length in lanes supported for this mode directly. */ > - if (target_supports_len_load_store_p (mode, is_load, ifn)) > + if (target_supports_len_load_store_p (mode, is_load, ifn, elsval)) > return mode; > > /* Check if length in bytes supported for same vector size VnQI. */ > machine_mode vmode; > poly_uint64 nunits = GET_MODE_SIZE (mode); > if (related_vector_mode (mode, QImode, nunits).exists (&vmode) > - && target_supports_len_load_store_p (vmode, is_load, ifn)) > + && target_supports_len_load_store_p (vmode, is_load, ifn, elsval)) > return vmode; > > return opt_machine_mode (); > diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h > index f2b49991462..117118c02fc 100644 > --- a/gcc/optabs-tree.h > +++ b/gcc/optabs-tree.h > @@ -47,9 +47,13 @@ bool expand_vec_cond_expr_p (tree, tree, enum tree_code); > void init_tree_optimization_optabs (tree); > bool target_supports_op_p (tree, enum tree_code, > enum optab_subtype = optab_default); > +bool target_supports_mask_load_store_p (machine_mode, machine_mode, > + bool, internal_fn *, int *); > bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool, > - internal_fn * = nullptr); > + internal_fn * = nullptr, > + int * = nullptr); > opt_machine_mode get_len_load_store_mode (machine_mode, bool, > - internal_fn * = nullptr); > + internal_fn * = nullptr, > + int * = nullptr); > > #endif > diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc > index 39fd887a96b..17f3cbbdb6c 100644 > --- a/gcc/tree-vect-data-refs.cc > +++ b/gcc/tree-vect-data-refs.cc > @@ -54,13 +54,15 @@ along with GCC; see the file COPYING3. If not see > #include "vec-perm-indices.h" > #include "internal-fn.h" > #include "gimple-fold.h" > +#include "optabs-query.h" > > /* Return true if load- or store-lanes optab OPTAB is implemented for > COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ > > static bool > vect_lanes_optab_supported_p (const char *name, convert_optab optab, > - tree vectype, unsigned HOST_WIDE_INT count) > + tree vectype, unsigned HOST_WIDE_INT count, > + int *elsval = nullptr) > { > machine_mode mode, array_mode; > bool limit_p; > @@ -80,7 +82,9 @@ vect_lanes_optab_supported_p (const char *name, > convert_optab optab, > } > } > > - if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing) > + enum insn_code icode; > + if ((icode = convert_optab_handler (optab, array_mode, mode)) > + == CODE_FOR_nothing) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > @@ -94,6 +98,9 @@ vect_lanes_optab_supported_p (const char *name, > convert_optab optab, > "can use %s<%s><%s>\n", name, GET_MODE_NAME > (array_mode), > GET_MODE_NAME (mode)); > > + if (elsval) > + *elsval = get_supported_else_val (icode, 3); > + > return true; > } > > @@ -4176,7 +4183,7 @@ bool > vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, > tree vectype, tree memory_type, tree offset_type, > int scale, internal_fn *ifn_out, > - tree *offset_vectype_out) > + tree *offset_vectype_out, int *elsval) > { > unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); > unsigned int element_bits = vector_element_bits (vectype); > @@ -4214,7 +4221,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, > bool masked_p, > > /* Test whether the target supports this combination. */ > if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, > - offset_vectype, scale)) > + offset_vectype, scale, > + elsval)) > { > *ifn_out = ifn; > *offset_vectype_out = offset_vectype; > @@ -4275,7 +4283,7 @@ vect_describe_gather_scatter_call (stmt_vec_info > stmt_info, > > bool > vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, > - gather_scatter_info *info) > + gather_scatter_info *info, int *elsval) > { > HOST_WIDE_INT scale = 1; > poly_int64 pbitpos, pbitsize; > @@ -4299,6 +4307,16 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > ifn = gimple_call_internal_fn (call); > if (internal_gather_scatter_fn_p (ifn)) > { > + /* Extract the else value from a masked-load call. This is > + necessary when we created a gather_scatter pattern from a > + maskload. It is a bit cumbersome to basically create the > + same else value three times but it's probably acceptable until > + tree-ifcvt goes away. */ > + if (internal_fn_mask_index (ifn) >= 0 && elsval) > + { > + tree els = gimple_call_arg (call, internal_fn_else_index (ifn)); > + *elsval = vect_get_else_val_from_tree (els); > + } > vect_describe_gather_scatter_call (stmt_info, info); > return true; > } > @@ -4308,7 +4326,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > /* True if we should aim to use internal functions rather than > built-in functions. */ > bool use_ifn_p = (DR_IS_READ (dr) > - ? supports_vec_gather_load_p (TYPE_MODE (vectype)) > + ? supports_vec_gather_load_p (TYPE_MODE (vectype), > + elsval) > : supports_vec_scatter_store_p (TYPE_MODE (vectype))); > > base = DR_REF (dr); > @@ -6388,23 +6407,23 @@ vect_grouped_load_supported (tree vectype, bool > single_element_p, > > internal_fn > vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count, > - bool masked_p) > + bool masked_p, int *elsval) > { > if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes", > vec_mask_len_load_lanes_optab, vectype, > - count)) > + count, elsval)) > return IFN_MASK_LEN_LOAD_LANES; > else if (masked_p) > { > if (vect_lanes_optab_supported_p ("vec_mask_load_lanes", > vec_mask_load_lanes_optab, vectype, > - count)) > + count, elsval)) > return IFN_MASK_LOAD_LANES; > } > else > { > if (vect_lanes_optab_supported_p ("vec_load_lanes", > vec_load_lanes_optab, > - vectype, count)) > + vectype, count, elsval)) > return IFN_LOAD_LANES; > } > return IFN_LAST; > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index 4674a16d15f..3bee280fd91 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -6466,7 +6466,8 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo, > /* Make sure that the target supports an appropriate internal > function for the gather/scatter operation. */ > gather_scatter_info gs_info; > - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info) > + int elsval; > + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info, &elsval) > || gs_info.ifn == IFN_LAST) > return NULL; > > @@ -6489,20 +6490,26 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo, > tree offset = vect_add_conversion_to_pattern (vinfo, offset_type, > gs_info.offset, stmt_info); > > + tree vec_els = NULL_TREE; > /* Build the new pattern statement. */ > tree scale = size_int (gs_info.scale); > gcall *pattern_stmt; > + tree load_lhs; > if (DR_IS_READ (dr)) > { > tree zero = build_zero_cst (gs_info.element_type); > if (mask != NULL) > - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base, > - offset, scale, zero, mask); > + { > + vec_els = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype)); > + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base, > + offset, scale, zero, mask, > + vec_els); > + } > else > pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base, > offset, scale, zero); > - tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL); > - gimple_call_set_lhs (pattern_stmt, load_lhs); > + load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL); > + gimple_set_lhs (pattern_stmt, load_lhs); > } > else > { > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > index 5f0d9e51c32..22448ec9917 100644 > --- a/gcc/tree-vect-slp.cc > +++ b/gcc/tree-vect-slp.cc > @@ -507,13 +507,13 @@ static const int cond_expr_maps[3][5] = { > }; > static const int arg0_map[] = { 1, 0 }; > static const int arg1_map[] = { 1, 1 }; > -static const int arg2_map[] = { 1, 2 }; > -static const int arg1_arg4_map[] = { 2, 1, 4 }; > +static const int arg2_arg3_map[] = { 2, 2, 3 }; > +static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 }; > static const int arg3_arg2_map[] = { 2, 3, 2 }; > static const int op1_op0_map[] = { 2, 1, 0 }; > static const int off_map[] = { 1, -3 }; > static const int off_op0_map[] = { 2, -3, 0 }; > -static const int off_arg2_map[] = { 2, -3, 2 }; > +static const int off_arg2_arg3_map[] = { 3, -3, 2, 3 }; > static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 }; > static const int mask_call_maps[6][7] = { > { 1, 1, }, > @@ -560,14 +560,14 @@ vect_get_operand_map (const gimple *stmt, bool > gather_scatter_p = false, > switch (gimple_call_internal_fn (call)) > { > case IFN_MASK_LOAD: > - return gather_scatter_p ? off_arg2_map : arg2_map; > + return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map; > > case IFN_GATHER_LOAD: > return arg1_map; > > case IFN_MASK_GATHER_LOAD: > case IFN_MASK_LEN_GATHER_LOAD: > - return arg1_arg4_map; > + return arg1_arg4_arg5_map; > > case IFN_MASK_STORE: > return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map; > @@ -6818,6 +6818,18 @@ vect_slp_analyze_node_operations (vec_info *vinfo, > slp_tree node, > tree vector_type = SLP_TREE_VECTYPE (child); > if (!vector_type) > { > + /* Masked loads can have an undefined (default SSA definition) > + else operand. We do not need to cost it. */ > + vec<tree> ops = SLP_TREE_SCALAR_OPS (child); > + if ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node)) > + == load_vec_info_type) > + && ((ops.length () && > + TREE_CODE (ops[0]) == SSA_NAME > + && SSA_NAME_IS_DEFAULT_DEF (ops[0]) > + && VAR_P (SSA_NAME_VAR (ops[0]))) > + || SLP_TREE_DEF_TYPE (child) == vect_constant_def)) > + continue; > + > /* For shifts with a scalar argument we don't need > to cost or code-generate anything. > ??? Represent this more explicitely. */ > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index 20cae83e820..9e721c72ddf 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -57,6 +57,7 @@ along with GCC; see the file COPYING3. If not see > #include "regs.h" > #include "attribs.h" > #include "optabs-libfuncs.h" > +#include "tree-dfa.h" > > /* For lang_hooks.types.type_for_mode. */ > #include "langhooks.h" > @@ -467,6 +468,10 @@ exist_non_indexing_operands_for_use_p (tree use, > stmt_vec_info stmt_info) > if (mask_index >= 0 > && use == gimple_call_arg (call, mask_index)) > return true; > + int els_index = internal_fn_else_index (ifn); > + if (els_index >= 0 > + && use == gimple_call_arg (call, els_index)) > + return true; > int stored_value_index = internal_fn_stored_value_index (ifn); > if (stored_value_index >= 0 > && use == gimple_call_arg (call, stored_value_index)) > @@ -1278,7 +1283,17 @@ vect_get_vec_defs_for_operand (vec_info *vinfo, > stmt_vec_info stmt_vinfo, > vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op)); > > gcc_assert (vector_type); > - tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL); > + /* A masked load can have a default SSA definition as else operand. > + We should "vectorize" this instead of creating a duplicate from the > + scalar default. */ > + tree vop; > + if (TREE_CODE (op) == SSA_NAME > + && SSA_NAME_IS_DEFAULT_DEF (op) > + && VAR_P (SSA_NAME_VAR (op))) > + vop = get_or_create_ssa_default_def (cfun, > + create_tmp_var (vector_type)); > + else > + vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL); > while (ncopies--) > vec_oprnds->quick_push (vop); > } > @@ -1500,7 +1515,8 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > vect_memory_access_type > memory_access_type, > gather_scatter_info *gs_info, > - tree scalar_mask) > + tree scalar_mask, > + int *elsval = nullptr) > { > /* Invariant loads need no special support. */ > if (memory_access_type == VMAT_INVARIANT) > @@ -1519,7 +1535,8 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > if (memory_access_type == VMAT_LOAD_STORE_LANES) > { > internal_fn ifn > - = (is_load ? vect_load_lanes_supported (vectype, group_size, true) > + = (is_load ? vect_load_lanes_supported (vectype, group_size, true, > + elsval) > : vect_store_lanes_supported (vectype, group_size, true)); > if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES) > vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); > @@ -1549,7 +1566,8 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > if (internal_gather_scatter_fn_supported_p (len_ifn, vectype, > gs_info->memory_type, > gs_info->offset_vectype, > - gs_info->scale)) > + gs_info->scale, > + elsval)) > vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); > else if (internal_gather_scatter_fn_supported_p (ifn, vectype, > gs_info->memory_type, > @@ -1608,7 +1626,8 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > machine_mode mask_mode; > machine_mode vmode; > bool using_partial_vectors_p = false; > - if (get_len_load_store_mode (vecmode, is_load).exists (&vmode)) > + if (get_len_load_store_mode > + (vecmode, is_load, nullptr, elsval).exists (&vmode)) > { > nvectors = group_memory_nvectors (group_size * vf, nunits); > unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE > (vecmode); > @@ -1616,7 +1635,8 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > using_partial_vectors_p = true; > } > else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode) > - && can_vec_mask_load_store_p (vecmode, mask_mode, is_load)) > + && can_vec_mask_load_store_p (vecmode, mask_mode, is_load, NULL, > + elsval)) > { > nvectors = group_memory_nvectors (group_size * vf, nunits); > vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, > scalar_mask); > @@ -1678,7 +1698,8 @@ prepare_vec_mask (loop_vec_info loop_vinfo, tree > mask_type, tree loop_mask, > static bool > vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, bool masked_p, > - gather_scatter_info *gs_info) > + gather_scatter_info *gs_info, > + int *elsval) > { > dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); > data_reference *dr = dr_info->dr; > @@ -1735,7 +1756,8 @@ vect_truncate_gather_scatter_offset (stmt_vec_info > stmt_info, > tree memory_type = TREE_TYPE (DR_REF (dr)); > if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, > vectype, memory_type, offset_type, scale, > - &gs_info->ifn, &gs_info->offset_vectype) > + &gs_info->ifn, &gs_info->offset_vectype, > + elsval) > || gs_info->ifn == IFN_LAST) > continue; > > @@ -1768,12 +1790,13 @@ vect_truncate_gather_scatter_offset (stmt_vec_info > stmt_info, > static bool > vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, bool masked_p, > - gather_scatter_info *gs_info) > + gather_scatter_info *gs_info, > + int *elsval) > { > - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info) > + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsval) > || gs_info->ifn == IFN_LAST) > return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo, > - masked_p, gs_info); > + masked_p, gs_info, elsval); > > tree old_offset_type = TREE_TYPE (gs_info->offset); > tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); > @@ -1986,7 +2009,8 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > dr_alignment_support *alignment_support_scheme, > int *misalignment, > gather_scatter_info *gs_info, > - internal_fn *lanes_ifn) > + internal_fn *lanes_ifn, > + int *elsval) > { > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; > @@ -2220,7 +2244,8 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > /* Otherwise try using LOAD/STORE_LANES. */ > *lanes_ifn > = vls_type == VLS_LOAD > - ? vect_load_lanes_supported (vectype, group_size, masked_p) > + ? vect_load_lanes_supported (vectype, group_size, masked_p, > + elsval) > : vect_store_lanes_supported (vectype, group_size, > masked_p); > if (*lanes_ifn != IFN_LAST) > @@ -2253,7 +2278,7 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > && single_element_p > && loop_vinfo > && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo, > - masked_p, gs_info)) > + masked_p, gs_info, elsval)) > *memory_access_type = VMAT_GATHER_SCATTER; > } > > @@ -2328,7 +2353,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > dr_alignment_support *alignment_support_scheme, > int *misalignment, > gather_scatter_info *gs_info, > - internal_fn *lanes_ifn) > + internal_fn *lanes_ifn, > + int *elsval = nullptr) > { > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); > @@ -2337,7 +2363,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > { > *memory_access_type = VMAT_GATHER_SCATTER; > - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)) > + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, > + elsval)) > gcc_unreachable (); > /* When using internal functions, we rely on pattern recognition > to convert the type of the offset to the type that the target > @@ -2391,7 +2418,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > masked_p, > vls_type, memory_access_type, poffset, > alignment_support_scheme, > - misalignment, gs_info, lanes_ifn)) > + misalignment, gs_info, lanes_ifn, > + elsval)) > return false; > } > else if (STMT_VINFO_STRIDED_P (stmt_info)) > @@ -2399,7 +2427,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > gcc_assert (!slp_node); > if (loop_vinfo > && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo, > - masked_p, gs_info)) > + masked_p, gs_info, elsval)) > *memory_access_type = VMAT_GATHER_SCATTER; > else > *memory_access_type = VMAT_ELEMENTWISE; > @@ -2667,6 +2695,52 @@ vect_build_zero_merge_argument (vec_info *vinfo, > return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL); > } > > +/* Return the supported else value for a masked load internal function IFN. > + The vector type is given in VECTYPE and the mask type in VECTYPE2. > + TYPE specifies the type of the returned else value. */ > + > +tree > +vect_get_mask_load_else (int elsval, tree type) > +{ > + tree els; > + if (elsval == MASK_LOAD_ELSE_UNDEFINED) > + { > + tree tmp = create_tmp_var (type); > + /* No need to warn about anything. */ > + TREE_NO_WARNING (tmp) = 1; > + els = get_or_create_ssa_default_def (cfun, tmp); > + } > + else if (elsval == MASK_LOAD_ELSE_M1) > + els = build_minus_one_cst (type); > + else if (elsval == MASK_LOAD_ELSE_ZERO) > + els = build_zero_cst (type); > + else > + __builtin_unreachable (); > + > + return els; > +} > + > +/* Return the integer define a tree else operand ELS represents. > + This performs the inverse of vect_get_mask_load_else. Refer to > + vect_check_gather_scatter for its usage rationale. */ > + > +int > +vect_get_else_val_from_tree (tree els) > +{ > + if (TREE_CODE (els) == SSA_NAME > + && SSA_NAME_IS_DEFAULT_DEF (els)) && TREE_CODE (SSA_NAME_VAR (els)) == VAR_DECL > + return MASK_LOAD_ELSE_UNDEFINED; > + else else if? > + { > + if (zerop (els)) > + return MASK_LOAD_ELSE_ZERO; > + else if (integer_minus_onep (els)) > + return MASK_LOAD_ELSE_M1; > + else > + return MASK_LOAD_ELSE_NONE; I think this should be gcc_unreachable () instead. We shouldn't answer NONE when passing in 2. > + } > +} > + > /* Build a gather load call while vectorizing STMT_INFO. Insert new > instructions before GSI and add them to VEC_STMT. GS_INFO describes > the gather load operation. If the load is conditional, MASK is the > @@ -2748,8 +2822,20 @@ vect_build_one_gather_load_call (vec_info *vinfo, > stmt_vec_info stmt_info, > } > > tree scale = build_int_cst (scaletype, gs_info->scale); > - gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op, > - mask_op, scale); > + gimple *new_stmt; > + > + /* ??? Rather than trying to querying a builtin's predicates > + in a cumbersome way go with a zero else value. > + As this vectorizer path is x86 only and x86 gather loads > + always zero-fill masked elements a hard-coded zero else value > + seems reasonable. */ But did you adjust the x86 builtin decls? I'd just leave those alone and have an implicit zero else value here - what's the point in making it explicit but hardcoded? The rest looks OK to me. > + tree vec_els = build_zero_cst (vectype); > + if (!mask) > + new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op, > + mask_op, scale); > + else > + new_stmt = gimple_build_call (gs_info->decl, 6, src_op, ptr, op, > + mask_op, vec_els, scale); > > if (!useless_type_conversion_p (vectype, rettype)) > { > @@ -9832,6 +9918,7 @@ vectorizable_load (vec_info *vinfo, > gather_scatter_info gs_info; > tree ref_type; > enum vect_def_type mask_dt = vect_unknown_def_type; > + enum vect_def_type els_dt = vect_unknown_def_type; > > if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) > return false; > @@ -9844,8 +9931,12 @@ vectorizable_load (vec_info *vinfo, > return false; > > tree mask = NULL_TREE, mask_vectype = NULL_TREE; > + tree els = NULL_TREE; tree els_vectype = NULL_TREE; > + > int mask_index = -1; > + int els_index = -1; > slp_tree slp_op = NULL; > + slp_tree els_op = NULL; > if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) > { > scalar_dest = gimple_assign_lhs (assign); > @@ -9885,6 +9976,15 @@ vectorizable_load (vec_info *vinfo, > && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index, > &mask, &slp_op, &mask_dt, &mask_vectype)) > return false; > + > + els_index = internal_fn_else_index (ifn); > + if (els_index >= 0 && slp_node) > + els_index = vect_slp_child_index_for_operand > + (call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info)); > + if (els_index >= 0 > + && !vect_is_simple_use (vinfo, stmt_info, slp_node, els_index, > + &els, &els_op, &els_dt, &els_vectype)) > + return false; > } > > tree vectype = STMT_VINFO_VECTYPE (stmt_info); > @@ -10027,10 +10127,11 @@ vectorizable_load (vec_info *vinfo, > int misalignment; > poly_int64 poffset; > internal_fn lanes_ifn; > + int elsval; > if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, > VLS_LOAD, > ncopies, &memory_access_type, &poffset, > &alignment_support_scheme, &misalignment, &gs_info, > - &lanes_ifn)) > + &lanes_ifn, &elsval)) > return false; > > if (mask) > @@ -10040,7 +10141,8 @@ vectorizable_load (vec_info *vinfo, > machine_mode vec_mode = TYPE_MODE (vectype); > if (!VECTOR_MODE_P (vec_mode) > || !can_vec_mask_load_store_p (vec_mode, > - TYPE_MODE (mask_vectype), true)) > + TYPE_MODE (mask_vectype), > + true, NULL, &elsval)) > return false; > } > else if (memory_access_type != VMAT_LOAD_STORE_LANES > @@ -10771,6 +10873,7 @@ vectorizable_load (vec_info *vinfo, > } > > tree vec_mask = NULL_TREE; > + tree vec_els = NULL_TREE; > if (memory_access_type == VMAT_LOAD_STORE_LANES) > { > gcc_assert (alignment_support_scheme == dr_aligned > @@ -10860,6 +10963,9 @@ vectorizable_load (vec_info *vinfo, > } > } > > + if (loop_masks || final_mask) > + vec_els = vect_get_mask_load_else (elsval, vectype); > + > gcall *call; > if (final_len && final_mask) > { > @@ -10868,9 +10974,10 @@ vectorizable_load (vec_info *vinfo, > VEC_MASK, LEN, BIAS). */ > unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); > tree alias_ptr = build_int_cst (ref_type, align); > - call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5, > + call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 6, > dataref_ptr, alias_ptr, > - final_mask, final_len, bias); > + final_mask, vec_els, > + final_len, bias); > } > else if (final_mask) > { > @@ -10879,9 +10986,9 @@ vectorizable_load (vec_info *vinfo, > VEC_MASK). */ > unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); > tree alias_ptr = build_int_cst (ref_type, align); > - call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, > + call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4, > dataref_ptr, alias_ptr, > - final_mask); > + final_mask, vec_els); > } > else > { > @@ -11023,17 +11130,27 @@ vectorizable_load (vec_info *vinfo, > } > } > > + if (final_mask) > + vec_els = vect_get_mask_load_else (elsval, vectype); > + > gcall *call; > if (final_len && final_mask) > - call > - = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7, > - dataref_ptr, vec_offset, > - scale, zero, final_mask, > - final_len, bias); > + { > + call > + = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, > + 8, dataref_ptr, > + vec_offset, scale, zero, > + final_mask, vec_els, > + final_len, bias); > + } > else if (final_mask) > - call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5, > - dataref_ptr, vec_offset, > - scale, zero, final_mask); > + { > + call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, > + 6, dataref_ptr, > + vec_offset, scale, > + zero, final_mask, > + vec_els); > + } > else > call = gimple_build_call_internal (IFN_GATHER_LOAD, 4, > dataref_ptr, vec_offset, > @@ -11347,6 +11464,7 @@ vectorizable_load (vec_info *vinfo, > tree final_mask = NULL_TREE; > tree final_len = NULL_TREE; > tree bias = NULL_TREE; > + > if (!costing_p) > { > if (mask) > @@ -11399,7 +11517,8 @@ vectorizable_load (vec_info *vinfo, > if (loop_lens) > { > opt_machine_mode new_ovmode > - = get_len_load_store_mode (vmode, true, &partial_ifn); > + = get_len_load_store_mode (vmode, true, &partial_ifn, > + &elsval); > new_vmode = new_ovmode.require (); > unsigned factor > = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode); > @@ -11411,7 +11530,7 @@ vectorizable_load (vec_info *vinfo, > { > if (!can_vec_mask_load_store_p ( > vmode, TYPE_MODE (TREE_TYPE (final_mask)), true, > - &partial_ifn)) > + &partial_ifn, &elsval)) > gcc_unreachable (); > } > > @@ -11439,19 +11558,27 @@ vectorizable_load (vec_info *vinfo, > bias = build_int_cst (intQI_type_node, biasval); > } > > + tree vec_els; > + if (final_len || final_mask) > + vec_els = vect_get_mask_load_else (elsval, vectype); > + > if (final_len) > { > tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT); > gcall *call; > if (partial_ifn == IFN_MASK_LEN_LOAD) > - call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5, > - dataref_ptr, ptr, > - final_mask, final_len, > - bias); > + { > + call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, > + 6, dataref_ptr, ptr, > + final_mask, vec_els, > + final_len, bias); > + } > else > - call = gimple_build_call_internal (IFN_LEN_LOAD, 4, > - dataref_ptr, ptr, > - final_len, bias); > + { > + call = gimple_build_call_internal (IFN_LEN_LOAD, 4, > + dataref_ptr, ptr, > + final_len, bias); > + } > gimple_call_set_nothrow (call, true); > new_stmt = call; > data_ref = NULL_TREE; > @@ -11474,9 +11601,10 @@ vectorizable_load (vec_info *vinfo, > else if (final_mask) > { > tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT); > - gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3, > + gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4, > dataref_ptr, ptr, > - final_mask); > + final_mask, > + vec_els); > gimple_call_set_nothrow (call, true); > new_stmt = call; > data_ref = NULL_TREE; > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index df6c8ada2f7..e14b3f278b4 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -2399,9 +2399,11 @@ extern bool vect_slp_analyze_instance_alignment > (vec_info *, slp_instance); > extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *); > extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); > extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, > - tree, int, internal_fn *, tree *); > + tree, int, internal_fn *, tree *, > + int * = nullptr); > extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, > - gather_scatter_info *); > + gather_scatter_info *, > + int * = nullptr); > extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, > vec<data_reference_p> *, > vec<int> *, int); > @@ -2419,7 +2421,8 @@ extern tree vect_create_destination_var (tree, tree); > extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); > extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, > bool); > extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); > -extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, > bool); > +extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, > + bool, int * = nullptr); > extern void vect_permute_store_chain (vec_info *, vec<tree> &, > unsigned int, stmt_vec_info, > gimple_stmt_iterator *, vec<tree> *); > @@ -2560,6 +2563,8 @@ extern int vect_slp_child_index_for_operand (const > gimple *, int op, bool); > > extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree, > gimple_stmt_iterator *); > +extern tree vect_get_mask_load_else (int, tree); > +extern int vect_get_else_val_from_tree (tree els); > > /* In tree-vect-patterns.cc. */ > extern void > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)