On Sun, 11 Aug 2024, Robin Dapp wrote:

> This patch adds an else operand to vectorized masked load calls.
> The current implementation adds else-value arguments to the respective
> target-querying functions that is used to supply the vectorizer with the
> proper else value.
> 
> Right now, the only spot where a zero else value is actually enforced is
> tree-ifcvt.  Loop masking and other instances of masked loads in the
> vectorizer itself do not use vec_cond_exprs.
> 
> gcc/ChangeLog:
> 
>       * internal-fn.cc (internal_gather_scatter_fn_supported_p): Add
>       else argument.
>       * internal-fn.h (internal_gather_scatter_fn_supported_p): Ditto.
>       (MASK_LOAD_ELSE_NONE): Define.
>       (MASK_LOAD_ELSE_ZERO): Ditto.
>       (MASK_LOAD_ELSE_M1): Ditto.
>       (MASK_LOAD_ELSE_UNDEFINED): Ditto.
>       * optabs-query.cc (supports_vec_convert_optab_p): Return icode.
>       (get_supported_else_val): Return supported else value for
>       optab's operand at index.
>       (supports_vec_gather_load_p): Add else argument.
>       (supports_vec_scatter_store_p): Ditto.
>       * optabs-query.h (supports_vec_gather_load_p): Ditto.
>       (get_supported_else_val): Ditto.
>       * optabs-tree.cc (target_supports_mask_load_store_p): Ditto.
>       (can_vec_mask_load_store_p): Ditto.
>       (target_supports_len_load_store_p): Ditto.
>       (get_len_load_store_mode): Ditto.
>       * optabs-tree.h (target_supports_mask_load_store_p): Ditto.
>       (can_vec_mask_load_store_p): Ditto.
>       * tree-vect-data-refs.cc (vect_lanes_optab_supported_p): Ditto.
>       (vect_gather_scatter_fn_p): Ditto.
>       (vect_check_gather_scatter): Ditto.
>       (vect_load_lanes_supported): Ditto.
>       * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern):
>       Ditto.
>       * tree-vect-slp.cc (vect_get_operand_map): Adjust indices for
>       else operand.
>       (vect_slp_analyze_node_operations): Skip undefined else operand.
>       * tree-vect-stmts.cc (exist_non_indexing_operands_for_use_p):
>       Add else operand handling.
>       (vect_get_vec_defs_for_operand): Handle undefined else operand.
>       (check_load_store_for_partial_vectors): Add else argument.
>       (vect_truncate_gather_scatter_offset): Ditto.
>       (vect_use_strided_gather_scatters_p): Ditto.
>       (get_group_load_store_type): Ditto.
>       (get_load_store_type): Ditto.
>       (vect_get_mask_load_else): Ditto.
>       (vect_get_else_val_from_tree): Ditto.
>       (vect_build_one_gather_load_call): Add zero else operand.
>       (vectorizable_load): Use else operand.
>       * tree-vectorizer.h (vect_gather_scatter_fn_p): Add else
>       argument.
>       (vect_load_lanes_supported): Ditto.
>       (vect_get_mask_load_else): Ditto.
>       (vect_get_else_val_from_tree): Ditto.
> ---
>  gcc/internal-fn.cc         |  19 +++-
>  gcc/internal-fn.h          |  11 +-
>  gcc/optabs-query.cc        |  83 +++++++++++---
>  gcc/optabs-query.h         |   3 +-
>  gcc/optabs-tree.cc         |  43 +++++---
>  gcc/optabs-tree.h          |   8 +-
>  gcc/tree-vect-data-refs.cc |  39 +++++--
>  gcc/tree-vect-patterns.cc  |  17 ++-
>  gcc/tree-vect-slp.cc       |  22 +++-
>  gcc/tree-vect-stmts.cc     | 218 +++++++++++++++++++++++++++++--------
>  gcc/tree-vectorizer.h      |  11 +-
>  11 files changed, 367 insertions(+), 107 deletions(-)
> 
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 586978e8f3f..2fc676e397c 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4988,12 +4988,15 @@ internal_fn_stored_value_index (internal_fn fn)
>     or stored.  OFFSET_VECTOR_TYPE is the vector type that holds the
>     offset from the shared base address of each loaded or stored element.
>     SCALE is the amount by which these offsets should be multiplied
> -   *after* they have been extended to address width.  */
> +   *after* they have been extended to address width.
> +   If the target supports the gather load the supported else value
> +   will be written to the position ELSVAL points to if it is nonzero.  */
>  
>  bool
>  internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
>                                       tree memory_element_type,
> -                                     tree offset_vector_type, int scale)
> +                                     tree offset_vector_type, int scale,
> +                                     int *elsval)
>  {
>    if (!tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vector_type)),
>                          TYPE_SIZE (memory_element_type)))
> @@ -5006,9 +5009,15 @@ internal_gather_scatter_fn_supported_p (internal_fn 
> ifn, tree vector_type,
>                                          TYPE_MODE (offset_vector_type));
>    int output_ops = internal_load_fn_p (ifn) ? 1 : 0;
>    bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (offset_vector_type));
> -  return (icode != CODE_FOR_nothing
> -       && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p))
> -       && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)));
> +  bool ok = false;

That = false looks like a dead assignment.

> +  ok = icode != CODE_FOR_nothing
> +    && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p))
> +    && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale));
> +
> +  if (ok && elsval)
> +    *elsval = get_supported_else_val (icode, 6);
> +
> +  return ok;
>  }
>  
>  /* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index 2785a5a95a2..7b301732069 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -240,9 +240,18 @@ extern int internal_fn_len_index (internal_fn);
>  extern int internal_fn_else_index (internal_fn);
>  extern int internal_fn_stored_value_index (internal_fn);
>  extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
> -                                                 tree, tree, int);
> +                                                 tree, tree, int,
> +                                                 int * = nullptr);
>  extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
>                                               poly_uint64, unsigned int);
> +
> +/* Integer constants representing which else value is supported for masked 
> load
> +   functions.  */
> +#define MASK_LOAD_ELSE_NONE 0

What is MASK_LOAD_ELSE_NONE used for?  It's for the case there
isn't any masking, right?

> +#define MASK_LOAD_ELSE_ZERO -1
> +#define MASK_LOAD_ELSE_M1 -2
> +#define MASK_LOAD_ELSE_UNDEFINED -3
> +
>  #define VECT_PARTIAL_BIAS_UNSUPPORTED 127
>  
>  extern signed char internal_len_load_store_bias (internal_fn ifn,
> diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
> index 5149de57468..93c1d7b8485 100644
> --- a/gcc/optabs-query.cc
> +++ b/gcc/optabs-query.cc
> @@ -29,6 +29,9 @@ along with GCC; see the file COPYING3.  If not see
>  #include "rtl.h"
>  #include "recog.h"
>  #include "vec-perm-indices.h"
> +#include "internal-fn.h"
> +#include "memmodel.h"
> +#include "optabs.h"
>  
>  struct target_optabs default_target_optabs;
>  struct target_optabs *this_fn_optabs = &default_target_optabs;
> @@ -665,34 +668,74 @@ lshift_cheap_p (bool speed_p)
>     that mode, given that the second mode is always an integer vector.
>     If MODE is VOIDmode, return true if OP supports any vector mode.  */
>  
> -static bool
> +static enum insn_code
>  supports_vec_convert_optab_p (optab op, machine_mode mode)
>  {
>    int start = mode == VOIDmode ? 0 : mode;
>    int end = mode == VOIDmode ? MAX_MACHINE_MODE - 1 : mode;
> +  enum insn_code icode = CODE_FOR_nothing;
>    for (int i = start; i <= end; ++i)
>      if (VECTOR_MODE_P ((machine_mode) i))
>        for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j)
> -     if (convert_optab_handler (op, (machine_mode) i,
> -                                (machine_mode) j) != CODE_FOR_nothing)
> -       return true;
> +     {
> +       if ((icode
> +            = convert_optab_handler (op, (machine_mode) i,
> +                                     (machine_mode) j)) != CODE_FOR_nothing)
> +         return icode;
> +     }
>  
> -  return false;
> +  return icode;
>  }
>  
> +/* Return the supported else value for the optab referred to by ICODE.  The
> +   index of the else operand must be specified in ELS_INDEX.
> +   If no else value is supported, return MASK_LOAD_ELSE_NONE.  */
> +int
> +get_supported_else_val (enum insn_code icode, unsigned els_index)
> +{
> +  const struct insn_data_d *data = &insn_data[icode];
> +  machine_mode els_mode = data->operand[els_index].mode;
> +
> +  /* For now we only support else values of 0, -1 and "undefined".  */
> +  /* ??? Does a -1 constant make sense for anything but integer?  */

All bits set probably makes sense for all component modes.  But we can
worry when we need it.

> +  if (GET_MODE_CLASS (els_mode) == MODE_VECTOR_INT
> +      && insn_operand_matches (icode, els_index, CONSTM1_RTX (els_mode)))
> +    {
> +      return MASK_LOAD_ELSE_M1;
> +    }
> +  else if (insn_operand_matches (icode, els_index, gen_rtx_SCRATCH 
> (els_mode)))
> +    {
> +      return MASK_LOAD_ELSE_UNDEFINED;
> +    }
> +  else if (insn_operand_matches (icode, els_index, CONST0_RTX (els_mode)))
> +    {
> +      return MASK_LOAD_ELSE_ZERO;
> +    }

Why this particular order?  I'd perform the CONST0_RTX matching first and
the CONSTM1_RTX matching last.  I would have expected
"supported else value" to report a set of supported values, it seems
the setup doesn't leave any choice to the vectorizer but will force
from the above order?  So maybe get_default_else_val (...) instead?
And factor supported_else_val_p (enum insn_code, unsigned, int)
using that above for the case the vectorizer absolutely wants
zero-masking for example?

I think this function(s) and the #defines belong to internal-fn.{h,cc}

> +  return MASK_LOAD_ELSE_NONE;
> +}
> +
> +
>  /* If MODE is not VOIDmode, return true if vec_gather_load is available for
>     that mode.  If MODE is VOIDmode, return true if gather_load is available
>     for at least one vector mode.  */
>  
>  bool
> -supports_vec_gather_load_p (machine_mode mode)
> +supports_vec_gather_load_p (machine_mode mode, int *elsval)
>  {
> -  if (!this_fn_optabs->supports_vec_gather_load[mode])
> -    this_fn_optabs->supports_vec_gather_load[mode]
> -      = (supports_vec_convert_optab_p (gather_load_optab, mode)
> -      || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
> -      || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode)
> -      ? 1 : -1);
> +  enum insn_code icode = CODE_FOR_nothing;
> +  if (!this_fn_optabs->supports_vec_gather_load[mode] || elsval)
> +    {
> +      icode = supports_vec_convert_optab_p (gather_load_optab, mode);
> +      if (icode == CODE_FOR_nothing)
> +     icode = supports_vec_convert_optab_p (mask_gather_load_optab, mode);
> +      if (icode == CODE_FOR_nothing)
> +     icode = supports_vec_convert_optab_p (mask_len_gather_load_optab, mode);
> +      this_fn_optabs->supports_vec_gather_load[mode]
> +     = (icode != CODE_FOR_nothing) ? 1 : -1;
> +    }
> +
> +  if (elsval && icode != CODE_FOR_nothing)
> +    *elsval = get_supported_else_val (icode, 6);
>  
>    return this_fn_optabs->supports_vec_gather_load[mode] > 0;
>  }
> @@ -704,12 +747,18 @@ supports_vec_gather_load_p (machine_mode mode)
>  bool
>  supports_vec_scatter_store_p (machine_mode mode)
>  {
> +  enum insn_code icode;
>    if (!this_fn_optabs->supports_vec_scatter_store[mode])
> -    this_fn_optabs->supports_vec_scatter_store[mode]
> -      = (supports_vec_convert_optab_p (scatter_store_optab, mode)
> -      || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
> -      || supports_vec_convert_optab_p (mask_len_scatter_store_optab, mode)
> -      ? 1 : -1);
> +    {
> +      icode = supports_vec_convert_optab_p (scatter_store_optab, mode);
> +      if (icode == CODE_FOR_nothing)
> +     icode = supports_vec_convert_optab_p (mask_scatter_store_optab, mode);
> +      if (icode == CODE_FOR_nothing)
> +     icode = supports_vec_convert_optab_p (mask_len_scatter_store_optab,
> +                                           mode);
> +      this_fn_optabs->supports_vec_scatter_store[mode]
> +     = (icode != CODE_FOR_nothing) ? 1 : -1;
> +    }
>  
>    return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
>  }
> diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
> index 0cb2c21ba85..331832bfad2 100644
> --- a/gcc/optabs-query.h
> +++ b/gcc/optabs-query.h
> @@ -191,9 +191,10 @@ bool can_compare_and_swap_p (machine_mode, bool);
>  bool can_atomic_exchange_p (machine_mode, bool);
>  bool can_atomic_load_p (machine_mode);
>  bool lshift_cheap_p (bool);
> -bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
> +bool supports_vec_gather_load_p (machine_mode = E_VOIDmode, int * = nullptr);
>  bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
>  bool can_vec_extract (machine_mode, machine_mode);
> +int get_supported_else_val (enum insn_code, unsigned);
>  
>  /* Version of find_widening_optab_handler_and_mode that operates on
>     specific mode types.  */
> diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc
> index b69a5bc3676..68e1eb9167c 100644
> --- a/gcc/optabs-tree.cc
> +++ b/gcc/optabs-tree.cc
> @@ -554,22 +554,30 @@ target_supports_op_p (tree type, enum tree_code code,
>     load/store and return corresponding IFN in the last argument
>     (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).  */
>  
> -static bool
> +bool
>  target_supports_mask_load_store_p (machine_mode mode, machine_mode mask_mode,
> -                                bool is_load, internal_fn *ifn)
> +                                bool is_load, internal_fn *ifn,
> +                                int *elsval)
>  {
>    optab op = is_load ? maskload_optab : maskstore_optab;
>    optab len_op = is_load ? mask_len_load_optab : mask_len_store_optab;
> -  if (convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing)
> +  enum insn_code icode;
> +  if ((icode = convert_optab_handler (op, mode, mask_mode))
> +      != CODE_FOR_nothing)
>      {
>        if (ifn)
>       *ifn = is_load ? IFN_MASK_LOAD : IFN_MASK_STORE;
> +      if (elsval)
> +     *elsval = get_supported_else_val (icode, 3);
>        return true;
>      }
> -  else if (convert_optab_handler (len_op, mode, mask_mode) != 
> CODE_FOR_nothing)
> +  else if ((icode = convert_optab_handler (len_op, mode, mask_mode))
> +        != CODE_FOR_nothing)
>      {
>        if (ifn)
>       *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
> +      if (elsval)
> +     *elsval = get_supported_else_val (icode, 3);
>        return true;
>      }
>    return false;
> @@ -584,13 +592,15 @@ bool
>  can_vec_mask_load_store_p (machine_mode mode,
>                          machine_mode mask_mode,
>                          bool is_load,
> -                        internal_fn *ifn)
> +                        internal_fn *ifn,
> +                        int *elsval)
>  {
>    machine_mode vmode;
>  
>    /* If mode is vector mode, check it directly.  */
>    if (VECTOR_MODE_P (mode))
> -    return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn);
> +    return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn,
> +                                           elsval);
>  
>    /* Otherwise, return true if there is some vector mode with
>       the mask load/store supported.  */
> @@ -604,7 +614,8 @@ can_vec_mask_load_store_p (machine_mode mode,
>    vmode = targetm.vectorize.preferred_simd_mode (smode);
>    if (VECTOR_MODE_P (vmode)
>        && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> -      && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn))
> +      && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn,
> +                                         elsval))
>      return true;
>  
>    auto_vector_modes vector_modes;
> @@ -612,7 +623,8 @@ can_vec_mask_load_store_p (machine_mode mode,
>    for (machine_mode base_mode : vector_modes)
>      if (related_vector_mode (base_mode, smode).exists (&vmode)
>       && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> -     && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn))
> +     && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn,
> +                                           elsval))
>        return true;
>    return false;
>  }
> @@ -626,7 +638,7 @@ can_vec_mask_load_store_p (machine_mode mode,
>  
>  static bool
>  target_supports_len_load_store_p (machine_mode mode, bool is_load,
> -                               internal_fn *ifn)
> +                               internal_fn *ifn, int *elsval)
>  {
>    optab op = is_load ? len_load_optab : len_store_optab;
>    optab masked_op = is_load ? mask_len_load_optab : mask_len_store_optab;
> @@ -638,11 +650,15 @@ target_supports_len_load_store_p (machine_mode mode, 
> bool is_load,
>        return true;
>      }
>    machine_mode mask_mode;
> +  enum insn_code icode;
>    if (targetm.vectorize.get_mask_mode (mode).exists (&mask_mode)
> -      && convert_optab_handler (masked_op, mode, mask_mode) != 
> CODE_FOR_nothing)
> +      && ((icode = convert_optab_handler (masked_op, mode, mask_mode))
> +       != CODE_FOR_nothing))
>      {
>        if (ifn)
>       *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
> +      if (elsval)
> +     *elsval = get_supported_else_val (icode, 3);
>        return true;
>      }
>    return false;
> @@ -659,19 +675,20 @@ target_supports_len_load_store_p (machine_mode mode, 
> bool is_load,
>     which optab is supported in the target.  */
>  
>  opt_machine_mode
> -get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn)
> +get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn,
> +                      int *elsval)
>  {
>    gcc_assert (VECTOR_MODE_P (mode));
>  
>    /* Check if length in lanes supported for this mode directly.  */
> -  if (target_supports_len_load_store_p (mode, is_load, ifn))
> +  if (target_supports_len_load_store_p (mode, is_load, ifn, elsval))
>      return mode;
>  
>    /* Check if length in bytes supported for same vector size VnQI.  */
>    machine_mode vmode;
>    poly_uint64 nunits = GET_MODE_SIZE (mode);
>    if (related_vector_mode (mode, QImode, nunits).exists (&vmode)
> -      && target_supports_len_load_store_p (vmode, is_load, ifn))
> +      && target_supports_len_load_store_p (vmode, is_load, ifn, elsval))
>      return vmode;
>  
>    return opt_machine_mode ();
> diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h
> index f2b49991462..117118c02fc 100644
> --- a/gcc/optabs-tree.h
> +++ b/gcc/optabs-tree.h
> @@ -47,9 +47,13 @@ bool expand_vec_cond_expr_p (tree, tree, enum tree_code);
>  void init_tree_optimization_optabs (tree);
>  bool target_supports_op_p (tree, enum tree_code,
>                          enum optab_subtype = optab_default);
> +bool target_supports_mask_load_store_p (machine_mode, machine_mode,
> +                                bool, internal_fn *, int *);
>  bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool,
> -                             internal_fn * = nullptr);
> +                             internal_fn * = nullptr,
> +                             int * = nullptr);
>  opt_machine_mode get_len_load_store_mode (machine_mode, bool,
> -                                       internal_fn * = nullptr);
> +                                       internal_fn * = nullptr,
> +                                       int * = nullptr);
>  
>  #endif
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 39fd887a96b..17f3cbbdb6c 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -54,13 +54,15 @@ along with GCC; see the file COPYING3.  If not see
>  #include "vec-perm-indices.h"
>  #include "internal-fn.h"
>  #include "gimple-fold.h"
> +#include "optabs-query.h"
>  
>  /* Return true if load- or store-lanes optab OPTAB is implemented for
>     COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
>  
>  static bool
>  vect_lanes_optab_supported_p (const char *name, convert_optab optab,
> -                           tree vectype, unsigned HOST_WIDE_INT count)
> +                           tree vectype, unsigned HOST_WIDE_INT count,
> +                           int *elsval = nullptr)
>  {
>    machine_mode mode, array_mode;
>    bool limit_p;
> @@ -80,7 +82,9 @@ vect_lanes_optab_supported_p (const char *name, 
> convert_optab optab,
>       }
>      }
>  
> -  if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
> +  enum insn_code icode;
> +  if ((icode = convert_optab_handler (optab, array_mode, mode))
> +      == CODE_FOR_nothing)
>      {
>        if (dump_enabled_p ())
>       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -94,6 +98,9 @@ vect_lanes_optab_supported_p (const char *name, 
> convert_optab optab,
>                       "can use %s<%s><%s>\n", name, GET_MODE_NAME 
> (array_mode),
>                       GET_MODE_NAME (mode));
>  
> +  if (elsval)
> +    *elsval = get_supported_else_val (icode, 3);
> +
>    return true;
>  }
>  
> @@ -4176,7 +4183,7 @@ bool
>  vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
>                         tree vectype, tree memory_type, tree offset_type,
>                         int scale, internal_fn *ifn_out,
> -                       tree *offset_vectype_out)
> +                       tree *offset_vectype_out, int *elsval)
>  {
>    unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
>    unsigned int element_bits = vector_element_bits (vectype);
> @@ -4214,7 +4221,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
> bool masked_p,
>  
>        /* Test whether the target supports this combination.  */
>        if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
> -                                               offset_vectype, scale))
> +                                               offset_vectype, scale,
> +                                               elsval))
>       {
>         *ifn_out = ifn;
>         *offset_vectype_out = offset_vectype;
> @@ -4275,7 +4283,7 @@ vect_describe_gather_scatter_call (stmt_vec_info 
> stmt_info,
>  
>  bool
>  vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
> -                        gather_scatter_info *info)
> +                        gather_scatter_info *info, int *elsval)
>  {
>    HOST_WIDE_INT scale = 1;
>    poly_int64 pbitpos, pbitsize;
> @@ -4299,6 +4307,16 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>        ifn = gimple_call_internal_fn (call);
>        if (internal_gather_scatter_fn_p (ifn))
>       {
> +       /* Extract the else value from a masked-load call.  This is
> +          necessary when we created a gather_scatter pattern from a
> +          maskload.  It is a bit cumbersome to basically create the
> +          same else value three times but it's probably acceptable until
> +          tree-ifcvt goes away.  */
> +       if (internal_fn_mask_index (ifn) >= 0 && elsval)
> +         {
> +           tree els = gimple_call_arg (call, internal_fn_else_index (ifn));
> +           *elsval = vect_get_else_val_from_tree (els);
> +         }
>         vect_describe_gather_scatter_call (stmt_info, info);
>         return true;
>       }
> @@ -4308,7 +4326,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>    /* True if we should aim to use internal functions rather than
>       built-in functions.  */
>    bool use_ifn_p = (DR_IS_READ (dr)
> -                 ? supports_vec_gather_load_p (TYPE_MODE (vectype))
> +                 ? supports_vec_gather_load_p (TYPE_MODE (vectype),
> +                                               elsval)
>                   : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
>  
>    base = DR_REF (dr);
> @@ -6388,23 +6407,23 @@ vect_grouped_load_supported (tree vectype, bool 
> single_element_p,
>  
>  internal_fn
>  vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
> -                        bool masked_p)
> +                        bool masked_p, int *elsval)
>  {
>    if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
>                                   vec_mask_len_load_lanes_optab, vectype,
> -                                 count))
> +                                 count, elsval))
>      return IFN_MASK_LEN_LOAD_LANES;
>    else if (masked_p)
>      {
>        if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
>                                       vec_mask_load_lanes_optab, vectype,
> -                                     count))
> +                                     count, elsval))
>       return IFN_MASK_LOAD_LANES;
>      }
>    else
>      {
>        if (vect_lanes_optab_supported_p ("vec_load_lanes", 
> vec_load_lanes_optab,
> -                                     vectype, count))
> +                                     vectype, count, elsval))
>       return IFN_LOAD_LANES;
>      }
>    return IFN_LAST;
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 4674a16d15f..3bee280fd91 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -6466,7 +6466,8 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
>    /* Make sure that the target supports an appropriate internal
>       function for the gather/scatter operation.  */
>    gather_scatter_info gs_info;
> -  if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
> +  int elsval;
> +  if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info, &elsval)
>        || gs_info.ifn == IFN_LAST)
>      return NULL;
>  
> @@ -6489,20 +6490,26 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
>    tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
>                                               gs_info.offset, stmt_info);
>  
> +  tree vec_els = NULL_TREE;
>    /* Build the new pattern statement.  */
>    tree scale = size_int (gs_info.scale);
>    gcall *pattern_stmt;
> +  tree load_lhs;
>    if (DR_IS_READ (dr))
>      {
>        tree zero = build_zero_cst (gs_info.element_type);
>        if (mask != NULL)
> -     pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
> -                                                offset, scale, zero, mask);
> +     {
> +       vec_els = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
> +       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base,
> +                                                  offset, scale, zero, mask,
> +                                                  vec_els);
> +     }
>        else
>       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
>                                                  offset, scale, zero);
> -      tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
> -      gimple_call_set_lhs (pattern_stmt, load_lhs);
> +      load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
> +      gimple_set_lhs (pattern_stmt, load_lhs);
>      }
>    else
>      {
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 5f0d9e51c32..22448ec9917 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -507,13 +507,13 @@ static const int cond_expr_maps[3][5] = {
>  };
>  static const int arg0_map[] = { 1, 0 };
>  static const int arg1_map[] = { 1, 1 };
> -static const int arg2_map[] = { 1, 2 };
> -static const int arg1_arg4_map[] = { 2, 1, 4 };
> +static const int arg2_arg3_map[] = { 2, 2, 3 };
> +static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 };
>  static const int arg3_arg2_map[] = { 2, 3, 2 };
>  static const int op1_op0_map[] = { 2, 1, 0 };
>  static const int off_map[] = { 1, -3 };
>  static const int off_op0_map[] = { 2, -3, 0 };
> -static const int off_arg2_map[] = { 2, -3, 2 };
> +static const int off_arg2_arg3_map[] = { 3, -3, 2, 3 };
>  static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 };
>  static const int mask_call_maps[6][7] = {
>    { 1, 1, },
> @@ -560,14 +560,14 @@ vect_get_operand_map (const gimple *stmt, bool 
> gather_scatter_p = false,
>       switch (gimple_call_internal_fn (call))
>         {
>         case IFN_MASK_LOAD:
> -         return gather_scatter_p ? off_arg2_map : arg2_map;
> +         return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map;
>  
>         case IFN_GATHER_LOAD:
>           return arg1_map;
>  
>         case IFN_MASK_GATHER_LOAD:
>         case IFN_MASK_LEN_GATHER_LOAD:
> -         return arg1_arg4_map;
> +         return arg1_arg4_arg5_map;
>  
>         case IFN_MASK_STORE:
>           return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map;
> @@ -6818,6 +6818,18 @@ vect_slp_analyze_node_operations (vec_info *vinfo, 
> slp_tree node,
>         tree vector_type = SLP_TREE_VECTYPE (child);
>         if (!vector_type)
>           {
> +           /* Masked loads can have an undefined (default SSA definition)
> +              else operand.  We do not need to cost it.  */
> +           vec<tree> ops = SLP_TREE_SCALAR_OPS (child);
> +           if ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node))
> +                == load_vec_info_type)
> +               && ((ops.length () &&
> +                    TREE_CODE (ops[0]) == SSA_NAME
> +                    && SSA_NAME_IS_DEFAULT_DEF (ops[0])
> +                    && VAR_P (SSA_NAME_VAR (ops[0])))
> +                   || SLP_TREE_DEF_TYPE (child) == vect_constant_def))
> +             continue;
> +
>             /* For shifts with a scalar argument we don't need
>                to cost or code-generate anything.
>                ???  Represent this more explicitely.  */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 20cae83e820..9e721c72ddf 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -57,6 +57,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "regs.h"
>  #include "attribs.h"
>  #include "optabs-libfuncs.h"
> +#include "tree-dfa.h"
>  
>  /* For lang_hooks.types.type_for_mode.  */
>  #include "langhooks.h"
> @@ -467,6 +468,10 @@ exist_non_indexing_operands_for_use_p (tree use, 
> stmt_vec_info stmt_info)
>         if (mask_index >= 0
>             && use == gimple_call_arg (call, mask_index))
>           return true;
> +       int els_index = internal_fn_else_index (ifn);
> +       if (els_index >= 0
> +           && use == gimple_call_arg (call, els_index))
> +         return true;
>         int stored_value_index = internal_fn_stored_value_index (ifn);
>         if (stored_value_index >= 0
>             && use == gimple_call_arg (call, stored_value_index))
> @@ -1278,7 +1283,17 @@ vect_get_vec_defs_for_operand (vec_info *vinfo, 
> stmt_vec_info stmt_vinfo,
>       vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
>  
>        gcc_assert (vector_type);
> -      tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
> +      /* A masked load can have a default SSA definition as else operand.
> +      We should "vectorize" this instead of creating a duplicate from the
> +      scalar default.  */
> +      tree vop;
> +      if (TREE_CODE (op) == SSA_NAME
> +       && SSA_NAME_IS_DEFAULT_DEF (op)
> +       && VAR_P (SSA_NAME_VAR (op)))
> +     vop = get_or_create_ssa_default_def (cfun,
> +                                          create_tmp_var (vector_type));
> +      else
> +     vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
>        while (ncopies--)
>       vec_oprnds->quick_push (vop);
>      }
> @@ -1500,7 +1515,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>                                     vect_memory_access_type
>                                     memory_access_type,
>                                     gather_scatter_info *gs_info,
> -                                   tree scalar_mask)
> +                                   tree scalar_mask,
> +                                   int *elsval = nullptr)
>  {
>    /* Invariant loads need no special support.  */
>    if (memory_access_type == VMAT_INVARIANT)
> @@ -1519,7 +1535,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>    if (memory_access_type == VMAT_LOAD_STORE_LANES)
>      {
>        internal_fn ifn
> -     = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
> +     = (is_load ? vect_load_lanes_supported (vectype, group_size, true,
> +                                             elsval)
>                  : vect_store_lanes_supported (vectype, group_size, true));
>        if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
>       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
> @@ -1549,7 +1566,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>        if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
>                                                 gs_info->memory_type,
>                                                 gs_info->offset_vectype,
> -                                               gs_info->scale))
> +                                               gs_info->scale,
> +                                               elsval))
>       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
>        else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
>                                                      gs_info->memory_type,
> @@ -1608,7 +1626,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>    machine_mode mask_mode;
>    machine_mode vmode;
>    bool using_partial_vectors_p = false;
> -  if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
> +  if (get_len_load_store_mode
> +      (vecmode, is_load, nullptr, elsval).exists (&vmode))
>      {
>        nvectors = group_memory_nvectors (group_size * vf, nunits);
>        unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE 
> (vecmode);
> @@ -1616,7 +1635,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>        using_partial_vectors_p = true;
>      }
>    else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
> -        && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
> +        && can_vec_mask_load_store_p (vecmode, mask_mode, is_load, NULL,
> +                                      elsval))
>      {
>        nvectors = group_memory_nvectors (group_size * vf, nunits);
>        vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, 
> scalar_mask);
> @@ -1678,7 +1698,8 @@ prepare_vec_mask (loop_vec_info loop_vinfo, tree 
> mask_type, tree loop_mask,
>  static bool
>  vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
>                                    loop_vec_info loop_vinfo, bool masked_p,
> -                                  gather_scatter_info *gs_info)
> +                                  gather_scatter_info *gs_info,
> +                                  int *elsval)
>  {
>    dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
>    data_reference *dr = dr_info->dr;
> @@ -1735,7 +1756,8 @@ vect_truncate_gather_scatter_offset (stmt_vec_info 
> stmt_info,
>        tree memory_type = TREE_TYPE (DR_REF (dr));
>        if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
>                                    vectype, memory_type, offset_type, scale,
> -                                  &gs_info->ifn, &gs_info->offset_vectype)
> +                                  &gs_info->ifn, &gs_info->offset_vectype,
> +                                  elsval)
>         || gs_info->ifn == IFN_LAST)
>       continue;
>  
> @@ -1768,12 +1790,13 @@ vect_truncate_gather_scatter_offset (stmt_vec_info 
> stmt_info,
>  static bool
>  vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
>                                   loop_vec_info loop_vinfo, bool masked_p,
> -                                 gather_scatter_info *gs_info)
> +                                 gather_scatter_info *gs_info,
> +                                 int *elsval)
>  {
> -  if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
> +  if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsval)
>        || gs_info->ifn == IFN_LAST)
>      return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
> -                                             masked_p, gs_info);
> +                                             masked_p, gs_info, elsval);
>  
>    tree old_offset_type = TREE_TYPE (gs_info->offset);
>    tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
> @@ -1986,7 +2009,8 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>                          dr_alignment_support *alignment_support_scheme,
>                          int *misalignment,
>                          gather_scatter_info *gs_info,
> -                        internal_fn *lanes_ifn)
> +                        internal_fn *lanes_ifn,
> +                        int *elsval)
>  {
>    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
>    class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
> @@ -2220,7 +2244,8 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>             /* Otherwise try using LOAD/STORE_LANES.  */
>             *lanes_ifn
>               = vls_type == VLS_LOAD
> -                 ? vect_load_lanes_supported (vectype, group_size, masked_p)
> +                 ? vect_load_lanes_supported (vectype, group_size, masked_p,
> +                                              elsval)
>                   : vect_store_lanes_supported (vectype, group_size,
>                                                 masked_p);
>             if (*lanes_ifn != IFN_LAST)
> @@ -2253,7 +2278,7 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>         && single_element_p
>         && loop_vinfo
>         && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
> -                                              masked_p, gs_info))
> +                                              masked_p, gs_info, elsval))
>       *memory_access_type = VMAT_GATHER_SCATTER;
>      }
>  
> @@ -2328,7 +2353,8 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>                    dr_alignment_support *alignment_support_scheme,
>                    int *misalignment,
>                    gather_scatter_info *gs_info,
> -                  internal_fn *lanes_ifn)
> +                  internal_fn *lanes_ifn,
> +                  int *elsval = nullptr)
>  {
>    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
>    poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
> @@ -2337,7 +2363,8 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>    if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
>      {
>        *memory_access_type = VMAT_GATHER_SCATTER;
> -      if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
> +      if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
> +                                   elsval))
>       gcc_unreachable ();
>        /* When using internal functions, we rely on pattern recognition
>        to convert the type of the offset to the type that the target
> @@ -2391,7 +2418,8 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>                                     masked_p,
>                                     vls_type, memory_access_type, poffset,
>                                     alignment_support_scheme,
> -                                   misalignment, gs_info, lanes_ifn))
> +                                   misalignment, gs_info, lanes_ifn,
> +                                   elsval))
>       return false;
>      }
>    else if (STMT_VINFO_STRIDED_P (stmt_info))
> @@ -2399,7 +2427,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>        gcc_assert (!slp_node);
>        if (loop_vinfo
>         && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
> -                                              masked_p, gs_info))
> +                                              masked_p, gs_info, elsval))
>       *memory_access_type = VMAT_GATHER_SCATTER;
>        else
>       *memory_access_type = VMAT_ELEMENTWISE;
> @@ -2667,6 +2695,52 @@ vect_build_zero_merge_argument (vec_info *vinfo,
>    return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
>  }
>  
> +/* Return the supported else value for a masked load internal function IFN.
> +   The vector type is given in VECTYPE and the mask type in VECTYPE2.
> +   TYPE specifies the type of the returned else value.  */
> +
> +tree
> +vect_get_mask_load_else (int elsval, tree type)
> +{
> +  tree els;
> +  if (elsval == MASK_LOAD_ELSE_UNDEFINED)
> +    {
> +      tree tmp = create_tmp_var (type);
> +      /* No need to warn about anything.  */
> +      TREE_NO_WARNING (tmp) = 1;
> +      els = get_or_create_ssa_default_def (cfun, tmp);
> +    }
> +  else if (elsval == MASK_LOAD_ELSE_M1)
> +    els = build_minus_one_cst (type);
> +  else if (elsval == MASK_LOAD_ELSE_ZERO)
> +    els = build_zero_cst (type);
> +  else
> +    __builtin_unreachable ();
> +
> +  return els;
> +}
> +
> +/* Return the integer define a tree else operand ELS represents.
> +   This performs the inverse of vect_get_mask_load_else.  Refer to
> +   vect_check_gather_scatter for its usage rationale.  */
> +
> +int
> +vect_get_else_val_from_tree (tree els)
> +{
> +  if (TREE_CODE (els) == SSA_NAME
> +      && SSA_NAME_IS_DEFAULT_DEF (els))

&& TREE_CODE (SSA_NAME_VAR (els)) == VAR_DECL

> +    return MASK_LOAD_ELSE_UNDEFINED;
> +  else

else if?

> +    {
> +      if (zerop (els))
> +     return MASK_LOAD_ELSE_ZERO;
> +      else if (integer_minus_onep (els))
> +     return MASK_LOAD_ELSE_M1;
> +      else
> +     return MASK_LOAD_ELSE_NONE;

I think this should be gcc_unreachable () instead.  We shouldn't
answer NONE when passing in 2.

> +    }
> +}
> +
>  /* Build a gather load call while vectorizing STMT_INFO.  Insert new
>     instructions before GSI and add them to VEC_STMT.  GS_INFO describes
>     the gather load operation.  If the load is conditional, MASK is the
> @@ -2748,8 +2822,20 @@ vect_build_one_gather_load_call (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>      }
>  
>    tree scale = build_int_cst (scaletype, gs_info->scale);
> -  gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
> -                                     mask_op, scale);
> +  gimple *new_stmt;
> +
> +  /* ??? Rather than trying to querying a builtin's predicates
> +     in a cumbersome way go with a zero else value.
> +     As this vectorizer path is x86 only and x86 gather loads
> +     always zero-fill masked elements a hard-coded zero else value
> +     seems reasonable.  */

But did you adjust the x86 builtin decls?  I'd just leave those alone
and have an implicit zero else value here - what's the point in
making it explicit but hardcoded?

The rest looks OK to me.

> +  tree vec_els = build_zero_cst (vectype);
> +  if (!mask)
> +    new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
> +                               mask_op, scale);
> +  else
> +    new_stmt = gimple_build_call (gs_info->decl, 6, src_op, ptr, op,
> +                               mask_op, vec_els, scale);
>  
>    if (!useless_type_conversion_p (vectype, rettype))
>      {
> @@ -9832,6 +9918,7 @@ vectorizable_load (vec_info *vinfo,
>    gather_scatter_info gs_info;
>    tree ref_type;
>    enum vect_def_type mask_dt = vect_unknown_def_type;
> +  enum vect_def_type els_dt = vect_unknown_def_type;
>  
>    if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
>      return false;
> @@ -9844,8 +9931,12 @@ vectorizable_load (vec_info *vinfo,
>      return false;
>  
>    tree mask = NULL_TREE, mask_vectype = NULL_TREE;
> +  tree els = NULL_TREE; tree els_vectype = NULL_TREE;
> +
>    int mask_index = -1;
> +  int els_index = -1;
>    slp_tree slp_op = NULL;
> +  slp_tree els_op = NULL;
>    if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
>      {
>        scalar_dest = gimple_assign_lhs (assign);
> @@ -9885,6 +9976,15 @@ vectorizable_load (vec_info *vinfo,
>         && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
>                                     &mask, &slp_op, &mask_dt, &mask_vectype))
>       return false;
> +
> +      els_index = internal_fn_else_index (ifn);
> +      if (els_index >= 0 && slp_node)
> +     els_index = vect_slp_child_index_for_operand
> +       (call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
> +      if (els_index >= 0
> +       && !vect_is_simple_use (vinfo, stmt_info, slp_node, els_index,
> +                               &els, &els_op, &els_dt, &els_vectype))
> +     return false;
>      }
>  
>    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> @@ -10027,10 +10127,11 @@ vectorizable_load (vec_info *vinfo,
>    int misalignment;
>    poly_int64 poffset;
>    internal_fn lanes_ifn;
> +  int elsval;
>    if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, 
> VLS_LOAD,
>                           ncopies, &memory_access_type, &poffset,
>                           &alignment_support_scheme, &misalignment, &gs_info,
> -                         &lanes_ifn))
> +                         &lanes_ifn, &elsval))
>      return false;
>  
>    if (mask)
> @@ -10040,7 +10141,8 @@ vectorizable_load (vec_info *vinfo,
>         machine_mode vec_mode = TYPE_MODE (vectype);
>         if (!VECTOR_MODE_P (vec_mode)
>             || !can_vec_mask_load_store_p (vec_mode,
> -                                          TYPE_MODE (mask_vectype), true))
> +                                          TYPE_MODE (mask_vectype),
> +                                          true, NULL, &elsval))
>           return false;
>       }
>        else if (memory_access_type != VMAT_LOAD_STORE_LANES
> @@ -10771,6 +10873,7 @@ vectorizable_load (vec_info *vinfo,
>      }
>  
>    tree vec_mask = NULL_TREE;
> +  tree vec_els = NULL_TREE;
>    if (memory_access_type == VMAT_LOAD_STORE_LANES)
>      {
>        gcc_assert (alignment_support_scheme == dr_aligned
> @@ -10860,6 +10963,9 @@ vectorizable_load (vec_info *vinfo,
>               }
>           }
>  
> +       if (loop_masks || final_mask)
> +         vec_els = vect_get_mask_load_else (elsval, vectype);
> +
>         gcall *call;
>         if (final_len && final_mask)
>           {
> @@ -10868,9 +10974,10 @@ vectorizable_load (vec_info *vinfo,
>                                                   VEC_MASK, LEN, BIAS).  */
>             unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
>             tree alias_ptr = build_int_cst (ref_type, align);
> -           call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
> +           call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 6,
>                                                dataref_ptr, alias_ptr,
> -                                              final_mask, final_len, bias);
> +                                              final_mask, vec_els,
> +                                              final_len, bias);
>           }
>         else if (final_mask)
>           {
> @@ -10879,9 +10986,9 @@ vectorizable_load (vec_info *vinfo,
>                                               VEC_MASK).  */
>             unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
>             tree alias_ptr = build_int_cst (ref_type, align);
> -           call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
> +           call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4,
>                                                dataref_ptr, alias_ptr,
> -                                              final_mask);
> +                                              final_mask, vec_els);
>           }
>         else
>           {
> @@ -11023,17 +11130,27 @@ vectorizable_load (vec_info *vinfo,
>                       }
>                   }
>  
> +               if (final_mask)
> +                 vec_els = vect_get_mask_load_else (elsval, vectype);
> +
>                 gcall *call;
>                 if (final_len && final_mask)
> -                 call
> -                   = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
> -                                                 dataref_ptr, vec_offset,
> -                                                 scale, zero, final_mask,
> -                                                 final_len, bias);
> +                 {
> +                   call
> +                     = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD,
> +                                                   8, dataref_ptr,
> +                                                   vec_offset, scale, zero,
> +                                                   final_mask, vec_els,
> +                                                   final_len, bias);
> +                 }
>                 else if (final_mask)
> -                 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
> -                                                    dataref_ptr, vec_offset,
> -                                                    scale, zero, final_mask);
> +                 {
> +                   call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
> +                                                      6, dataref_ptr,
> +                                                      vec_offset, scale,
> +                                                      zero, final_mask,
> +                                                      vec_els);
> +                 }
>                 else
>                   call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
>                                                      dataref_ptr, vec_offset,
> @@ -11347,6 +11464,7 @@ vectorizable_load (vec_info *vinfo,
>         tree final_mask = NULL_TREE;
>         tree final_len = NULL_TREE;
>         tree bias = NULL_TREE;
> +
>         if (!costing_p)
>           {
>             if (mask)
> @@ -11399,7 +11517,8 @@ vectorizable_load (vec_info *vinfo,
>               if (loop_lens)
>                 {
>                   opt_machine_mode new_ovmode
> -                   = get_len_load_store_mode (vmode, true, &partial_ifn);
> +                   = get_len_load_store_mode (vmode, true, &partial_ifn,
> +                                              &elsval);
>                   new_vmode = new_ovmode.require ();
>                   unsigned factor
>                     = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
> @@ -11411,7 +11530,7 @@ vectorizable_load (vec_info *vinfo,
>                 {
>                   if (!can_vec_mask_load_store_p (
>                         vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
> -                       &partial_ifn))
> +                       &partial_ifn, &elsval))
>                     gcc_unreachable ();
>                 }
>  
> @@ -11439,19 +11558,27 @@ vectorizable_load (vec_info *vinfo,
>                   bias = build_int_cst (intQI_type_node, biasval);
>                 }
>  
> +             tree vec_els;
> +             if (final_len || final_mask)
> +               vec_els = vect_get_mask_load_else (elsval, vectype);
> +
>               if (final_len)
>                 {
>                   tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
>                   gcall *call;
>                   if (partial_ifn == IFN_MASK_LEN_LOAD)
> -                   call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
> -                                                      dataref_ptr, ptr,
> -                                                      final_mask, final_len,
> -                                                      bias);
> +                   {
> +                     call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
> +                                                        6, dataref_ptr, ptr,
> +                                                        final_mask, vec_els,
> +                                                        final_len, bias);
> +                   }
>                   else
> -                   call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
> -                                                      dataref_ptr, ptr,
> -                                                      final_len, bias);
> +                   {
> +                     call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
> +                                                        dataref_ptr, ptr,
> +                                                        final_len, bias);
> +                   }
>                   gimple_call_set_nothrow (call, true);
>                   new_stmt = call;
>                   data_ref = NULL_TREE;
> @@ -11474,9 +11601,10 @@ vectorizable_load (vec_info *vinfo,
>               else if (final_mask)
>                 {
>                   tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
> -                 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
> +                 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
>                                                             dataref_ptr, ptr,
> -                                                           final_mask);
> +                                                           final_mask,
> +                                                           vec_els);
>                   gimple_call_set_nothrow (call, true);
>                   new_stmt = call;
>                   data_ref = NULL_TREE;
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index df6c8ada2f7..e14b3f278b4 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2399,9 +2399,11 @@ extern bool vect_slp_analyze_instance_alignment 
> (vec_info *, slp_instance);
>  extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
>  extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
>  extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
> -                                   tree, int, internal_fn *, tree *);
> +                                   tree, int, internal_fn *, tree *,
> +                                   int * = nullptr);
>  extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
> -                                    gather_scatter_info *);
> +                                    gather_scatter_info *,
> +                                    int * = nullptr);
>  extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
>                                                vec<data_reference_p> *,
>                                                vec<int> *, int);
> @@ -2419,7 +2421,8 @@ extern tree vect_create_destination_var (tree, tree);
>  extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
>  extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, 
> bool);
>  extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
> -extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, 
> bool);
> +extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT,
> +                                           bool, int * = nullptr);
>  extern void vect_permute_store_chain (vec_info *, vec<tree> &,
>                                     unsigned int, stmt_vec_info,
>                                     gimple_stmt_iterator *, vec<tree> *);
> @@ -2560,6 +2563,8 @@ extern int vect_slp_child_index_for_operand (const 
> gimple *, int op, bool);
>  
>  extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree,
>                             gimple_stmt_iterator *);
> +extern tree vect_get_mask_load_else (int, tree);
> +extern int vect_get_else_val_from_tree (tree els);
>  
>  /* In tree-vect-patterns.cc.  */
>  extern void
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to