On Wed, 5 Nov 2025, Christopher Bazley wrote:

> 
> On 28/10/2025 13:29, Richard Biener wrote:
> > On Tue, 28 Oct 2025, Christopher Bazley wrote:
> >
> > +/* Materialize length number INDEX for a group of scalar stmts in SLP_NODE
> > that
> > +   operate on NVECTORS vectors of type VECTYPE, where 0 <= INDEX <
> > NVECTORS.  A
> > +   length limit is only required for the tail, therefore NULL_TREE is
> > returned
> > +   for every value of INDEX except that last; otherwise, return a value
> > that
> > +   contains FACTOR multiplied by the number of elements that should be
> > +   processed.  */
> > +
> > +tree
> > +vect_slp_get_bb_len (slp_tree slp_node, unsigned int nvectors, tree
> > vectype,
> > +                  unsigned int index, unsigned int factor)
> > +{
> > +  gcc_checking_assert (SLP_TREE_CAN_USE_LEN_P (slp_node));
> > +
> > +  /* Only the last vector can be a partial vector.  */
> > +  if (index < nvectors - 1)
> > +    return NULL_TREE;
> > +
> > +  /* vect_get_num_copies only allows a partial vector if it is the only
> > +     vector.  */
> > +  if (nvectors > 1)
> > +    return NULL_TREE;
> > +
> > +  gcc_checking_assert (nvectors == 1);
> > +
> > +  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
> > +  unsigned int group_size = SLP_TREE_LANES (slp_node);
> > +
> > +  /* A single vector can be a full vector, in which case no length limit is
> > +   * needed.  */
> > +  if (known_eq (nunits, group_size))
> > +    return NULL_TREE;
> > +
> > +  /* Return the scaled length of a single partial vector.  */
> > +  gcc_checking_assert (known_lt (group_size, nunits));
> > +  return size_int (group_size * factor);
> > +}
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > index 3115c610736..5ec65b2b2de 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -1408,7 +1408,9 @@ vectorizable_internal_function (combined_fn cfn, tree
> > fndecl,
> >   /* Record that a complete set of masks associated with VINFO would need to
> >      contain a sequence of NVECTORS masks that each control a vector of type
> >      VECTYPE.  If SCALAR_MASK is nonnull, the fully-masked loop would AND
> > -   these vector masks with the vector version of SCALAR_MASK.  */
> > +   these vector masks with the vector version of SCALAR_MASK.
> > Alternatively,
> > +   if doing basic block vectorization, record that an equivalent mask would
> > be
> > +   required to vectorize SLP_NODE.  */
> >   static void
> >   vect_record_mask (vec_info *vinfo, slp_tree slp_node, unsigned int
> >   nvectors,
> >                 tree vectype, tree scalar_mask)
> > @@ -1418,7 +1420,10 @@ vect_record_mask (vec_info *vinfo, slp_tree slp_node,
> > unsigned int nvectors,
> >       vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
> >       nvectors,
> >                          vectype, scalar_mask);
> >     else
> > -    (void) slp_node; // FORNOW
> > +    {
> > +      gcc_checking_assert (!SLP_TREE_CAN_USE_LEN_P (slp_node));
> > +      SLP_TREE_CAN_USE_MASK_P (slp_node) = true;
> > +    }
> >   }
> >
> >   /* Given a complete set of masks associated with VINFO, extract mask
> > number
> > @@ -1436,16 +1441,15 @@ vect_get_mask (vec_info *vinfo, slp_tree slp_node,
> > gimple_stmt_iterator *gsi,
> >       return vect_get_loop_mask (loop_vinfo, gsi, &LOOP_VINFO_MASKS
> >       (loop_vinfo),
> >                              nvectors, vectype, index);
> >     else
> > -    {
> > -      (void) slp_node; // FORNOW
> > -      return NULL_TREE;
> > -    }
> > +    return vect_slp_get_bb_mask (slp_node, gsi, nvectors, vectype, index);
> >   }
> >
> >   /* Record that a complete set of lengths associated with VINFO would need
> >   to
> >      contain a sequence of NVECTORS lengths for controlling an operation on
> >      VECTYPE.  The operation splits each element of VECTYPE into FACTOR
> >      separate
> > -   subelements, measuring the length as a number of these subelements.  */
> > +   subelements, measuring the length as a number of these subelements.
> > +   Alternatively, if doing basic block vectorization, record that an
> > equivalent
> > +   length would be required to vectorize SLP_NODE.  */
> >   static void
> >   vect_record_len (vec_info *vinfo, slp_tree slp_node, unsigned int
> >   nvectors,
> >                tree vectype, unsigned int factor)
> > @@ -1455,7 +1459,10 @@ vect_record_len (vec_info *vinfo, slp_tree slp_node,
> > unsigned int nvectors,
> >       vect_record_loop_len (loop_vinfo, &LOOP_VINFO_LENS (loop_vinfo),
> >       nvectors,
> >                         vectype, factor);
> >     else
> > -    (void) slp_node; // FORNOW
> > +    {
> > +      gcc_checking_assert (!SLP_TREE_CAN_USE_MASK_P (slp_node));
> > +      SLP_TREE_CAN_USE_LEN_P (slp_node) = true;
> > +    }
> >   }
> >
> >   /* Given a complete set of lengths associated with VINFO, extract length
> > number
> > @@ -1476,10 +1483,7 @@ vect_get_len (vec_info *vinfo, slp_tree slp_node,
> > gimple_stmt_iterator *gsi,
> >       return vect_get_loop_len (loop_vinfo, gsi, &LOOP_VINFO_LENS
> >       (loop_vinfo),
> >                             nvectors, vectype, index, factor);
> >     else
> > -    {
> > -      (void) slp_node; // FORNOW
> > -      return NULL_TREE;
> > -    }
> > +    return vect_slp_get_bb_len (slp_node, nvectors, vectype, index,
> > factor);
> >   }
> >
> >   static tree permute_vec_elements (vec_info *, tree, tree, tree,
> > stmt_vec_info,
> > @@ -14252,24 +14256,35 @@ supportable_indirect_convert_operation
> > (code_helper code,
> >      mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
> >      Add the statements to SEQ.  */
> >
> > +void
> > +vect_gen_while_ssa_name (gimple_seq *seq, tree mask_type, tree start_index,
> > +                      tree end_index, tree ssa_name)
> > +{
> > +  tree cmp_type = TREE_TYPE (start_index);
> > +  gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
> > cmp_type,
> > +                                                    mask_type,
> > +                                                    OPTIMIZE_FOR_SPEED));
> > +  gcall *call
> > +    = gimple_build_call_internal (IFN_WHILE_ULT, 3, start_index, end_index,
> > +                               build_zero_cst (mask_type));
> > That's quite restrictive, for constant_p nunits you should be able to
> > create a VECTOR_CST.  How do you ensure that the actual vector length
> > is big enough btw?

CCing the list on my reply.

> I think that an existing function named fold_while_ult already optimises
> all IFN_WHILE_ULT usage that can be optimised safely, so I'm reluctant
> to duplicate a version of that logic here.

x86 does not implement IFN_WHILE_ULT, that's what I wanted to say.
I do not remember any check that ensures this is only called for
targets that do?

> If you're asking how I ensure mask_type is big enough for the mask, an
> existing function named truth_type_for picks an appropriate type for a
> given vectype. I don't know exactly how that works.
> 
> If you're asking how I ensure vectype is big enough for the data to be
> masked, an existing function named get_vectype_for_scalar_type picks an
> appropriate type for a given number of elements of a given scalar type.
> I modified that function so that it does not attempt to substitute a
> shorter vector type unless the natural vector type (for a given
> iteration of the outer loop in vect_slp_region) might be too long to
> store the whole group and the vectoriser is unable to generate masks to
> handle the unknown number of excess lanes. This prevents the group from
> being split.

I think instead of modifying this function you should enforce this
constraint where we currently reject BB vectorization and split
groups when we think unrolling is required.

Richard.

> --
> Christopher Bazley
> Staff Software Engineer, GNU Tools Team.
> Arm Ltd, 110 Fulbourn Road, Cambridge, CB1 9NJ, UK.
> http://www.arm.com/
> 
> IMPORTANT NOTICE: The contents of this email and any attachments are
> confidential and may also be privileged. If you are not the intended
> recipient, please notify the sender immediately and do not disclose the
> contents to any other person, use it for any purpose, or store or copy the
> information in any medium. Thank you.
> 
> 

-- 
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to