On Tue, 17 Aug 2021, Richard Sandiford wrote:

> Richard Biener via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> > This adds a fallback to the masked_ variants for gather_load
> > and scatter_store if the latter are not available.
> >
> > Bootstrap / regtest running on x86_64-unknown-linux-gnu.
> 
> LGTM FWIW.  I don't know the history behind the TREE_CODE (*mask) != SSA_NAME
> check.

I've traced it up to 045c12782cc8c but that just moved it as well.

> I guess we could probably remove the unmasked SVE optabs with this.

Yeah, it will make my life simpler in not needing to write expanders
for sth the CPU cannot do on x86.

The patch tested OK on x86_64-unknown-linux-gnu so I pushed it now.

Richard.

> Thanks,
> Richard
> 
> >
> > 2021-08-17  Richard Biener  <rguent...@suse.de>
> >
> >     * optabs-query.c (supports_vec_gather_load_p): Also check
> >     for masked optabs.
> >     (supports_vec_scatter_store_p): Likewise.
> >     * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Fall
> >     back to masked variants if non-masked are not supported.
> >     * tree-vect-patterns.c (vect_recog_gather_scatter_pattern):
> >     When we need to use masked gather/scatter but do not have
> >     a mask set up a constant true one.
> >     * tree-vect-stmts.c (vect_check_scalar_mask): Also allow
> >     non-SSA_NAME masks.
> > ---
> >  gcc/optabs-query.c        |  6 ++++--
> >  gcc/tree-vect-data-refs.c | 22 +++++++++++++++++++---
> >  gcc/tree-vect-patterns.c  |  7 +++++--
> >  gcc/tree-vect-stmts.c     |  8 --------
> >  4 files changed, 28 insertions(+), 15 deletions(-)
> >
> > diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
> > index 05ee5f517da..a6dd0fed610 100644
> > --- a/gcc/optabs-query.c
> > +++ b/gcc/optabs-query.c
> > @@ -740,7 +740,8 @@ supports_vec_gather_load_p ()
> >    this_fn_optabs->supports_vec_gather_load_cached = true;
> >  
> >    this_fn_optabs->supports_vec_gather_load
> > -    = supports_vec_convert_optab_p (gather_load_optab);
> > +    = (supports_vec_convert_optab_p (gather_load_optab)
> > +       || supports_vec_convert_optab_p (mask_gather_load_optab));
> >  
> >    return this_fn_optabs->supports_vec_gather_load;
> >  }
> > @@ -757,7 +758,8 @@ supports_vec_scatter_store_p ()
> >    this_fn_optabs->supports_vec_scatter_store_cached = true;
> >  
> >    this_fn_optabs->supports_vec_scatter_store
> > -    = supports_vec_convert_optab_p (scatter_store_optab);
> > +    = (supports_vec_convert_optab_p (scatter_store_optab)
> > +       || supports_vec_convert_optab_p (mask_scatter_store_optab));
> >  
> >    return this_fn_optabs->supports_vec_scatter_store;
> >  }
> > diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
> > index c4c14d74065..97745a830a2 100644
> > --- a/gcc/tree-vect-data-refs.c
> > +++ b/gcc/tree-vect-data-refs.c
> > @@ -3735,11 +3735,17 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool 
> > read_p, bool masked_p,
> >      return false;
> >  
> >    /* Work out which function we need.  */
> > -  internal_fn ifn;
> > +  internal_fn ifn, alt_ifn;
> >    if (read_p)
> > -    ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
> > +    {
> > +      ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
> > +      alt_ifn = IFN_MASK_GATHER_LOAD;
> > +    }
> >    else
> > -    ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
> > +    {
> > +      ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
> > +      alt_ifn = IFN_MASK_SCATTER_STORE;
> > +    }
> >  
> >    for (;;)
> >      {
> > @@ -3755,6 +3761,16 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool 
> > read_p, bool masked_p,
> >       *offset_vectype_out = offset_vectype;
> >       return true;
> >     }
> > +      else if (!masked_p
> > +          && internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
> > +                                                     memory_type,
> > +                                                     offset_vectype,
> > +                                                     scale))
> > +   {
> > +     *ifn_out = alt_ifn;
> > +     *offset_vectype_out = offset_vectype;
> > +     return true;
> > +   }
> >  
> >        if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
> >       && TYPE_PRECISION (offset_type) >= element_bits)
> > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
> > index 25de97bd9b0..899734005ce 100644
> > --- a/gcc/tree-vect-patterns.c
> > +++ b/gcc/tree-vect-patterns.c
> > @@ -4820,6 +4820,9 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
> >    if (mask)
> >      mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
> >                                       loop_vinfo);
> > +  else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
> > +      || gs_info.ifn == IFN_MASK_GATHER_LOAD)
> > +    mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
> >  
> >    /* Get the invariant base and non-invariant offset, converting the
> >       latter to the same width as the vector elements.  */
> > @@ -4847,11 +4850,11 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
> >      {
> >        tree rhs = vect_get_store_rhs (stmt_info);
> >        if (mask != NULL)
> > -   pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
> > +   pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
> >                                                base, offset, scale, rhs,
> >                                                mask);
> >        else
> > -   pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
> > +   pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
> >                                                base, offset, scale, rhs);
> >      }
> >    gimple_call_set_nothrow (pattern_stmt, true);
> > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> > index e356056be93..59100965d5e 100644
> > --- a/gcc/tree-vect-stmts.c
> > +++ b/gcc/tree-vect-stmts.c
> > @@ -2510,14 +2510,6 @@ vect_check_scalar_mask (vec_info *vinfo, 
> > stmt_vec_info stmt_info,
> >        return false;
> >      }
> >  
> > -  if (TREE_CODE (*mask) != SSA_NAME)
> > -    {
> > -      if (dump_enabled_p ())
> > -   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > -                    "mask argument is not an SSA name.\n");
> > -      return false;
> > -    }
> > -
> >    /* If the caller is not prepared for adjusting an external/constant
> >       SLP mask vector type fail.  */
> >    if (slp_node
> 

Reply via email to