To decide whether to create a new SLP instance for BB SLP,
vect_analyze_slp_instance will need the minimum number of lanes
in the SLP tree, which must not be less than the group size
(otherwise "unrolling" is required). All usage of max_nunits
is therefore replaced with a new class that encapsulates
both minimum and maximum.
---
 gcc/tree-vect-slp.cc  | 172 ++++++++++++++++++++++--------------------
 gcc/tree-vectorizer.h |  47 +++++++++++-
 2 files changed, 137 insertions(+), 82 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index e02b3379bb4..3d78f91c93a 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -130,7 +130,7 @@ _slp_tree::_slp_tree ()
   this->cycle_info.reduc_idx = -1;
   SLP_TREE_REF_COUNT (this) = 1;
   this->failed = NULL;
-  this->max_nunits = 1;
+  this->nunits = {UINT64_MAX, 1};
   this->lanes = 0;
   SLP_TREE_TYPE (this) = undef_vec_info_type;
   this->data = NULL;
@@ -1050,14 +1050,14 @@ compatible_calls_p (gcall *call1, gcall *call2, bool 
allow_two_operators)
 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
    caller's attempt to find the vector type in STMT_INFO with the narrowest
    element type.  Return true if VECTYPE is nonnull and if it is valid
-   for STMT_INFO.  When returning true, update MAX_NUNITS to reflect the
-   number of units in VECTYPE.  GROUP_SIZE and MAX_NUNITS are as for
+   for STMT_INFO.  When returning true, update NUNITS to reflect the
+   number of units in VECTYPE.  GROUP_SIZE and NUNITS are as for
    vect_build_slp_tree.  */
 
 static bool
-vect_record_max_nunits (vec_info *vinfo, stmt_vec_info stmt_info,
-                       unsigned int group_size,
-                       tree vectype, poly_uint64 *max_nunits)
+vect_record_nunits (vec_info *vinfo, stmt_vec_info stmt_info,
+                   unsigned int group_size, tree vectype,
+                   slp_tree_nunits *nunits)
 {
   if (!vectype)
     {
@@ -1070,7 +1070,7 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info 
stmt_info,
     }
 
   /* If populating the vector type requires unrolling then fail
-     before adjusting *max_nunits for basic-block vectorization.  */
+     before adjusting *nunits for basic-block vectorization.  */
   if (is_a <bb_vec_info> (vinfo)
       && !multiple_p (group_size, TYPE_VECTOR_SUBPARTS (vectype)))
     {
@@ -1083,7 +1083,7 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info 
stmt_info,
     }
 
   /* In case of multiple types we need to detect the smallest type.  */
-  vect_update_max_nunits (max_nunits, vectype);
+  vect_update_nunits (nunits, vectype);
   return true;
 }
 
@@ -1104,7 +1104,7 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info 
stmt_info,
 static bool
 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
                       vec<stmt_vec_info> stmts, unsigned int group_size,
-                      poly_uint64 *max_nunits, bool *matches,
+                      slp_tree_nunits *nunits, bool *matches,
                       bool *two_operators, tree *node_vectype)
 {
   unsigned int i;
@@ -1144,8 +1144,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
      as if nunits was not an issue.  This allows splitting of groups
      to happen.  */
   if (nunits_vectype
-      && !vect_record_max_nunits (vinfo, first_stmt_info, group_size,
-                                 nunits_vectype, max_nunits))
+      && !vect_record_nunits (vinfo, first_stmt_info, group_size,
+                             nunits_vectype, nunits))
     {
       gcc_assert (is_a <bb_vec_info> (vinfo));
       maybe_soft_fail = true;
@@ -1809,14 +1809,14 @@ vect_slp_linearize_chain (vec_info *vinfo,
 static slp_tree
 vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
                       vec<stmt_vec_info> stmts, unsigned int group_size,
-                      poly_uint64 *max_nunits,
+                      slp_tree_nunits *nunits,
                       bool *matches, unsigned *limit, unsigned *tree_size,
                       scalar_stmts_to_slp_tree_map_t *bst_map);
 
 static slp_tree
 vect_build_slp_tree (vec_info *vinfo,
                     vec<stmt_vec_info> stmts, unsigned int group_size,
-                    poly_uint64 *max_nunits,
+                    slp_tree_nunits *nunits,
                     bool *matches, unsigned *limit, unsigned *tree_size,
                     scalar_stmts_to_slp_tree_map_t *bst_map)
 {
@@ -1829,7 +1829,7 @@ vect_build_slp_tree (vec_info *vinfo,
       if (!(*leader)->failed)
        {
          SLP_TREE_REF_COUNT (*leader)++;
-         vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
+         vect_update_nunits (nunits, (*leader)->nunits);
          stmts.release ();
          return *leader;
        }
@@ -1863,9 +1863,9 @@ vect_build_slp_tree (vec_info *vinfo,
     dump_printf_loc (MSG_NOTE, vect_location,
                     "starting SLP discovery for node %p\n", (void *) res);
 
-  poly_uint64 this_max_nunits = 1;
+  slp_tree_nunits this_nunits{};
   slp_tree res_ = vect_build_slp_tree_2 (vinfo, res, stmts, group_size,
-                                       &this_max_nunits,
+                                       &this_nunits,
                                        matches, limit, tree_size, bst_map);
   if (!res_)
     {
@@ -1894,8 +1894,8 @@ vect_build_slp_tree (vec_info *vinfo,
                         "SLP discovery for node %p succeeded\n",
                         (void *) res);
       gcc_assert (res_ == res);
-      res->max_nunits = this_max_nunits;
-      vect_update_max_nunits (max_nunits, this_max_nunits);
+      res->nunits = this_nunits;
+      vect_update_nunits (nunits, this_nunits);
       /* Keep a reference for the bst_map use.  */
       SLP_TREE_REF_COUNT (res)++;
     }
@@ -1953,12 +1953,12 @@ vect_slp_build_two_operator_nodes (slp_tree perm, tree 
vectype,
 static slp_tree
 vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
                       vec<stmt_vec_info> stmts, unsigned int group_size,
-                      poly_uint64 *max_nunits,
+                      slp_tree_nunits *nunits,
                       bool *matches, unsigned *limit, unsigned *tree_size,
                       scalar_stmts_to_slp_tree_map_t *bst_map)
 {
   unsigned nops, i, this_tree_size = 0;
-  poly_uint64 this_max_nunits = *max_nunits;
+  slp_tree_nunits this_nunits = *nunits;
 
   matches[0] = false;
 
@@ -1984,8 +1984,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
        tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
        tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
                                                    group_size);
-       if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype,
-                                    max_nunits))
+       if (!vect_record_nunits (vinfo, stmt_info, group_size, vectype, nunits))
          return NULL;
 
        vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
@@ -2038,7 +2037,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
   unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
   tree vectype = NULL_TREE;
   if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
-                             &this_max_nunits, matches, &two_operators,
+                             &this_nunits, matches, &two_operators,
                              &vectype))
     return NULL;
 
@@ -2050,7 +2049,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
        gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
       else
        {
-         *max_nunits = this_max_nunits;
+         *nunits = this_nunits;
          (*tree_size)++;
          node = vect_create_new_slp_node (node, stmts, 0);
          SLP_TREE_VECTYPE (node) = vectype;
@@ -2132,7 +2131,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
                  bool *matches2 = XALLOCAVEC (bool, dr_group_size);
                  slp_tree unperm_load
                    = vect_build_slp_tree (vinfo, stmts2, dr_group_size,
-                                          &this_max_nunits, matches2, limit,
+                                          &this_nunits, matches2, limit,
                                           &this_tree_size, bst_map);
                  /* When we are able to do the full masked load emit that
                     followed by 'node' being the desired final permutation.  */
@@ -2435,7 +2434,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
                        else
                          op_stmts.quick_push (NULL);
                      child = vect_build_slp_tree (vinfo, op_stmts,
-                                                  group_size, &this_max_nunits,
+                                                  group_size, &this_nunits,
                                                   matches, limit,
                                                   &this_tree_size, bst_map);
                      /* ???  We're likely getting too many fatal mismatches
@@ -2591,7 +2590,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
              children[i] = child;
            }
          *tree_size += this_tree_size + 1;
-         *max_nunits = this_max_nunits;
+         *nunits = this_nunits;
          while (!chains.is_empty ())
            chains.pop ().release ();
          return node;
@@ -2870,7 +2869,7 @@ out:
          def_stmts2.create (1);
          def_stmts2.quick_push (oprnd_info->def_stmts[0]);
          child = vect_build_slp_tree (vinfo, def_stmts2, 1,
-                                      &this_max_nunits,
+                                      &this_nunits,
                                       matches, limit,
                                       &this_tree_size, bst_map);
          if (child)
@@ -2888,7 +2887,7 @@ out:
                    .quick_push (std::make_pair (0u, 0u));
                }
              SLP_TREE_CHILDREN (pnode).quick_push (child);
-             pnode->max_nunits = child->max_nunits;
+             pnode->nunits = child->nunits;
              children.safe_push (pnode);
              oprnd_info->def_stmts = vNULL;
              continue;
@@ -2898,7 +2897,7 @@ out:
        }
 
       if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
-                                       group_size, &this_max_nunits,
+                                       group_size, &this_nunits,
                                        matches, limit,
                                        &this_tree_size, bst_map)) != NULL)
        {
@@ -2987,7 +2986,7 @@ out:
          /* And try again with scratch 'matches' ... */
          bool *tem = XALLOCAVEC (bool, group_size);
          if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
-                                           group_size, &this_max_nunits,
+                                           group_size, &this_nunits,
                                            tem, limit,
                                            &this_tree_size, bst_map)) != NULL)
            {
@@ -3093,7 +3092,7 @@ fail:
     }
 
   *tree_size += this_tree_size + 1;
-  *max_nunits = this_max_nunits;
+  *nunits = this_nunits;
 
   if (two_operators)
     {
@@ -3239,16 +3238,15 @@ vect_print_slp_tree (dump_flags_t dump_kind, 
dump_location_t loc,
 
   dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
   dump_user_location_t user_loc = loc.get_user_location ();
-  dump_printf_loc (metadata, user_loc,
-                  "node%s %p (max_nunits=" HOST_WIDE_INT_PRINT_UNSIGNED
-                  ", refcnt=%u)",
-                  SLP_TREE_DEF_TYPE (node) == vect_external_def
-                  ? " (external)"
-                  : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
-                     ? " (constant)"
-                     : ""), (void *) node,
-                  estimated_poly_value (node->max_nunits),
-                                        SLP_TREE_REF_COUNT (node));
+  dump_printf_loc (
+    metadata, user_loc,
+    "node%s %p (nunits.min=" HOST_WIDE_INT_PRINT_UNSIGNED
+    ", nunits.max=" HOST_WIDE_INT_PRINT_UNSIGNED ", refcnt=%u)",
+    SLP_TREE_DEF_TYPE (node) == vect_external_def
+      ? " (external)"
+      : (SLP_TREE_DEF_TYPE (node) == vect_constant_def ? " (constant)" : ""),
+    (void *) node, estimated_poly_value (node->nunits.min),
+    estimated_poly_value (node->nunits.max), SLP_TREE_REF_COUNT (node));
   if (SLP_TREE_VECTYPE (node))
     dump_printf (metadata, " %T", SLP_TREE_VECTYPE (node));
   dump_printf (metadata, "%s",
@@ -3615,9 +3613,9 @@ vect_split_slp_store_group (stmt_vec_info first_vinfo, 
unsigned group1_size)
    statements and a vector of NUNITS elements.  */
 
 static poly_uint64
-calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
+calculate_unrolling_factor (slp_tree_nunits nunits, unsigned int group_size)
 {
-  return exact_div (common_multiple (nunits, group_size), group_size);
+  return exact_div (common_multiple (nunits.max, group_size), group_size);
 }
 
 /* Helper that checks to see if a node is a load node.  */
@@ -3679,9 +3677,9 @@ optimize_load_redistribution_1 
(scalar_stmts_to_slp_tree_map_t *bst_map,
                         (void *) root);
 
       bool *matches = XALLOCAVEC (bool, group_size);
-      poly_uint64 max_nunits = 1;
+      slp_tree_nunits nunits{};
       unsigned tree_size = 0, limit = 1;
-      node = vect_build_slp_tree (vinfo, stmts, group_size, &max_nunits,
+      node = vect_build_slp_tree (vinfo, stmts, group_size, &nunits,
                                  matches, &limit, &tree_size, bst_map);
       if (!node)
        stmts.release ();
@@ -3864,14 +3862,14 @@ vect_analyze_slp_instance (vec_info *vinfo,
 static slp_tree
 vect_build_slp_store_interleaving (vec<slp_tree> &rhs_nodes,
                                   vec<stmt_vec_info> &scalar_stmts,
-                                  poly_uint64 max_nunits)
+                                  slp_tree_nunits nunits)
 {
   unsigned int group_size = scalar_stmts.length ();
   slp_tree node = vect_create_new_slp_node (scalar_stmts,
                                            SLP_TREE_CHILDREN
                                              (rhs_nodes[0]).length ());
   SLP_TREE_VECTYPE (node) = SLP_TREE_VECTYPE (rhs_nodes[0]);
-  node->max_nunits = max_nunits;
+  node->nunits = nunits;
   for (unsigned l = 0;
        l < SLP_TREE_CHILDREN (rhs_nodes[0]).length (); ++l)
     {
@@ -3881,7 +3879,7 @@ vect_build_slp_store_interleaving (vec<slp_tree> 
&rhs_nodes,
       SLP_TREE_CHILDREN (node).quick_push (perm);
       SLP_TREE_LANE_PERMUTATION (perm).create (group_size);
       SLP_TREE_VECTYPE (perm) = SLP_TREE_VECTYPE (node);
-      perm->max_nunits = max_nunits;
+      perm->nunits = nunits;
       SLP_TREE_LANES (perm) = group_size;
       /* ???  We should set this NULL but that's not expected.  */
       SLP_TREE_REPRESENTATIVE (perm)
@@ -3937,7 +3935,7 @@ vect_build_slp_store_interleaving (vec<slp_tree> 
&rhs_nodes,
              SLP_TREE_LANES (permab) = n;
              SLP_TREE_LANE_PERMUTATION (permab).create (n);
              SLP_TREE_VECTYPE (permab) = SLP_TREE_VECTYPE (perm);
-             permab->max_nunits = max_nunits;
+             permab->nunits = nunits;
              /* ???  Should be NULL but that's not expected.  */
              SLP_TREE_REPRESENTATIVE (permab) = SLP_TREE_REPRESENTATIVE (perm);
              SLP_TREE_CHILDREN (permab).quick_push (a);
@@ -4008,7 +4006,7 @@ vect_build_slp_store_interleaving (vec<slp_tree> 
&rhs_nodes,
          SLP_TREE_LANES (permab) = n;
          SLP_TREE_LANE_PERMUTATION (permab).create (n);
          SLP_TREE_VECTYPE (permab) = SLP_TREE_VECTYPE (perm);
-         permab->max_nunits = max_nunits;
+         permab->nunits = nunits;
          /* ???  Should be NULL but that's not expected.  */
          SLP_TREE_REPRESENTATIVE (permab) = SLP_TREE_REPRESENTATIVE (perm);
          SLP_TREE_CHILDREN (permab).quick_push (a);
@@ -4088,7 +4086,7 @@ vect_build_slp_instance (vec_info *vinfo,
   /* Build the tree for the SLP instance.  */
   unsigned int group_size = scalar_stmts.length ();
   bool *matches = XALLOCAVEC (bool, group_size);
-  poly_uint64 max_nunits = 1;
+  slp_tree_nunits nunits{};
   unsigned tree_size = 0;
 
   slp_tree node = NULL;
@@ -4099,19 +4097,19 @@ vect_build_slp_instance (vec_info *vinfo,
     }
   else
     node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
-                               &max_nunits, matches, limit,
+                               &nunits, matches, limit,
                                &tree_size, bst_map);
   if (node != NULL)
     {
       /* Calculate the unrolling factor based on the smallest type.  */
       poly_uint64 unrolling_factor
-       = calculate_unrolling_factor (max_nunits, group_size);
+       = calculate_unrolling_factor (nunits, group_size);
 
       if (maybe_ne (unrolling_factor, 1U)
          && is_a <bb_vec_info> (vinfo))
        {
          unsigned HOST_WIDE_INT const_max_nunits;
-         if (!max_nunits.is_constant (&const_max_nunits)
+         if (!nunits.max.is_constant (&const_max_nunits)
              || const_max_nunits > group_size)
            {
              if (dump_enabled_p ())
@@ -4345,7 +4343,7 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
 
       unsigned int group_size = scalar_stmts.length ();
       bool *matches = XALLOCAVEC (bool, group_size);
-      poly_uint64 max_nunits = 1;
+      slp_tree_nunits max_nunits{};
       unsigned tree_size = 0;
       slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
                                           &max_nunits, matches, limit,
@@ -4488,7 +4486,7 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
   /* Build the tree for the SLP instance.  */
   unsigned int group_size = scalar_stmts.length ();
   bool *matches = XALLOCAVEC (bool, group_size);
-  poly_uint64 max_nunits = 1;
+  slp_tree_nunits nunits{};
   unsigned tree_size = 0;
 
   /* ???  We need this only for SLP discovery.  */
@@ -4496,7 +4494,7 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
     REDUC_GROUP_FIRST_ELEMENT (scalar_stmts[i]) = scalar_stmts[0];
 
   slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
-                                      &max_nunits, matches, limit,
+                                      &nunits, matches, limit,
                                       &tree_size, bst_map);
 
   for (unsigned i = 0; i < scalar_stmts.length (); ++i)
@@ -4638,11 +4636,11 @@ vect_analyze_slp_reduction (loop_vec_info vinfo,
   /* Build the tree for the SLP instance.  */
   unsigned int group_size = scalar_stmts.length ();
   bool *matches = XALLOCAVEC (bool, group_size);
-  poly_uint64 max_nunits = 1;
+  slp_tree_nunits nunits{};
   unsigned tree_size = 0;
 
   slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
-                                      &max_nunits, matches, limit,
+                                      &nunits, matches, limit,
                                       &tree_size, bst_map);
   if (node != NULL)
     {
@@ -4741,7 +4739,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
   /* Build the tree for the SLP instance.  */
   unsigned int group_size = scalar_stmts.length ();
   bool *matches = XALLOCAVEC (bool, group_size);
-  poly_uint64 max_nunits = 1;
+  slp_tree_nunits nunits{};
   unsigned tree_size = 0;
   unsigned i;
 
@@ -4753,26 +4751,40 @@ vect_analyze_slp_instance (vec_info *vinfo,
     }
   else
     node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
-                               &max_nunits, matches, limit,
+                               &nunits, matches, limit,
                                &tree_size, bst_map);
   if (node != NULL)
     {
       /* Calculate the unrolling factor based on the smallest type.  */
       poly_uint64 unrolling_factor
-       = calculate_unrolling_factor (max_nunits, group_size);
+       = calculate_unrolling_factor (nunits, group_size);
 
       if (maybe_ne (unrolling_factor, 1U)
-         && is_a <bb_vec_info> (vinfo))
+         && is_a<bb_vec_info> (vinfo)
+         && !known_ge (nunits.min, group_size))
        {
          unsigned HOST_WIDE_INT const_max_nunits;
-         if (!max_nunits.is_constant (&const_max_nunits)
+         if (!nunits.max.is_constant (&const_max_nunits)
              || const_max_nunits > group_size)
            {
              if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "Build SLP failed: store group "
-                                "size not a multiple of the vector size "
-                                "in basic block SLP\n");
+               {
+                 if (nunits.max.is_constant (&const_max_nunits))
+                   dump_printf_loc (
+                     MSG_MISSED_OPTIMIZATION, vect_location,
+                     "Build SLP failed: store group "
+                     "size %u not a multiple of the vector size "
+                     "%wu in basic block SLP\n",
+                     group_size, const_max_nunits);
+                 else
+                   dump_printf_loc (
+                     MSG_MISSED_OPTIMIZATION, vect_location,
+                     "Build SLP failed: store group "
+                     "size %u not a multiple of the vector size "
+                     "in basic block SLP\n",
+                     group_size);
+               }
+
              vect_free_slp_tree (node);
              return false;
            }
@@ -4929,7 +4941,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
          /* Analyze the stored values and pinch them together with
             a permute node so we can preserve the whole store group.  */
          auto_vec<slp_tree> rhs_nodes;
-         poly_uint64 max_nunits = 1;
+         slp_tree_nunits nunits{};
 
          unsigned int rhs_common_nlanes = 0;
          unsigned int start = 0, end = i;
@@ -4940,14 +4952,14 @@ vect_analyze_slp_instance (vec_info *vinfo,
              substmts.create (end - start);
              for (unsigned j = start; j < end; ++j)
                substmts.quick_push (scalar_stmts[j]);
-             max_nunits = 1;
+             nunits = {UINT64_MAX, 1};
              node = vect_build_slp_tree (vinfo, substmts, end - start,
-                                         &max_nunits,
+                                         &nunits,
                                          matches, limit, &tree_size, bst_map);
              if (node)
                {
                  rhs_nodes.safe_push (node);
-                 vect_update_max_nunits (&max_nunits, node->max_nunits);
+                 vect_update_nunits (&nunits, node->nunits);
                  if (start == 0)
                    rhs_common_nlanes = SLP_TREE_LANES (node);
                  else if (rhs_common_nlanes != SLP_TREE_LANES (node))
@@ -5011,7 +5023,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
                                               SLP_TREE_CHILDREN
                                                 (rhs_nodes[0]).length ());
              SLP_TREE_VECTYPE (node) = SLP_TREE_VECTYPE (rhs_nodes[0]);
-             node->max_nunits = max_nunits;
+             node->nunits = nunits;
              node->ldst_lanes = true;
              SLP_TREE_CHILDREN (node)
                .reserve_exact (SLP_TREE_CHILDREN (rhs_nodes[0]).length ()
@@ -5029,7 +5041,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
            }
          else
            node = vect_build_slp_store_interleaving (rhs_nodes, scalar_stmts,
-                                                     max_nunits);
+                                                     nunits);
 
          while (!rhs_nodes.is_empty ())
            vect_free_slp_tree (rhs_nodes.pop ());
@@ -5294,13 +5306,13 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
        }
       for (unsigned i = 0; i < DR_GROUP_GAP (first); ++i)
        stmts.quick_push (NULL);
-      poly_uint64 max_nunits = 1;
+      slp_tree_nunits nunits{};
       bool *matches = XALLOCAVEC (bool, group_lanes);
       unsigned limit = 1;
       unsigned tree_size = 0;
       slp_tree l0 = vect_build_slp_tree (loop_vinfo, stmts,
                                         group_lanes,
-                                        &max_nunits, matches, &limit,
+                                        &nunits, matches, &limit,
                                         &tree_size, bst_map);
       gcc_assert (!SLP_TREE_LOAD_PERMUTATION (l0).exists ());
 
@@ -8151,7 +8163,7 @@ vect_update_slp_vf_for_node (slp_tree node, poly_uint64 
&vf,
 
   /* We do not visit SLP nodes for constants or externals - those neither
      have a vector type set yet (vectorizable_* does this) nor do they
-     have max_nunits set.  Instead we rely on internal nodes max_nunit
+     have nunits set.  Instead we rely on internal nodes max_nunit
      to cover constant/external operands.
      Note that when we stop using fixed size vectors externs and constants
      shouldn't influence the (minimum) vectorization factor, instead
@@ -8159,7 +8171,7 @@ vect_update_slp_vf_for_node (slp_tree node, poly_uint64 
&vf,
      assign vector types to constants and externals and cause iteration
      to a higher vectorization factor when required.  */
   poly_uint64 node_vf
-    = calculate_unrolling_factor (node->max_nunits, SLP_TREE_LANES (node));
+    = calculate_unrolling_factor (node->nunits, SLP_TREE_LANES (node));
   vf = force_common_multiple (vf, node_vf);
 
   /* For permute nodes that are fed from externs or constants we have to
@@ -8169,7 +8181,7 @@ vect_update_slp_vf_for_node (slp_tree node, poly_uint64 
&vf,
       if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
        {
          poly_uint64 child_vf
-           = calculate_unrolling_factor (node->max_nunits,
+           = calculate_unrolling_factor (node->nunits,
                                          SLP_TREE_LANES (child));
          vf = force_common_multiple (vf, child_vf);
        }
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 359c994139b..ecfdb7d88ef 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -254,6 +254,18 @@ typedef auto_vec<std::pair<unsigned, unsigned>, 16> 
auto_lane_permutation_t;
 typedef vec<unsigned> load_permutation_t;
 typedef auto_vec<unsigned, 16> auto_load_permutation_t;
 
+struct slp_tree_nunits
+{
+  slp_tree_nunits () = default;
+
+  /* The minimum number of vector elements for the subtree rooted
+     at this node.  */
+  poly_uint64 min = UINT64_MAX;
+  /* The maximum number of vector elements for the subtree rooted
+     at this node.  */
+  poly_uint64 max = 1;
+};
+
 struct vect_data {
   virtual ~vect_data () = default;
 };
@@ -336,9 +348,9 @@ struct _slp_tree {
 
   /* Reference count in the SLP graph.  */
   unsigned int refcnt;
-  /* The maximum number of vector elements for the subtree rooted
+  /* The minimum and maximum number of vector elements for the subtree rooted
      at this node.  */
-  poly_uint64 max_nunits;
+  slp_tree_nunits nunits;
   /* The DEF type of this node.  */
   enum vect_def_type def_type;
   /* The number of scalar lanes produced by this node.  */
@@ -2326,6 +2338,37 @@ vect_update_max_nunits (poly_uint64 *max_nunits, tree 
vectype)
   vect_update_max_nunits (max_nunits, TYPE_VECTOR_SUBPARTS (vectype));
 }
 
+/* Update minimum and maximum unit count *NUNITS so that it accounts for
+   NEW_NUNITS.  *NUNITS can be {MAX,1} if we haven't yet recorded anything.
+   If NEW_NUNITS is {MAX,1} then this function has no effect.  */
+
+inline void
+vect_update_nunits (slp_tree_nunits *nunits, slp_tree_nunits new_nunits)
+{
+  vect_update_max_nunits (&nunits->max, new_nunits.max);
+
+  /* We also want to know whether each individual choice of vector type
+     requires no "unrolling", which requires the minimum number of units.
+     All unit counts have the form vec_info::vector_size * X for some
+     rational X, therefore we know the values are ordered.  */
+  if (!known_eq (new_nunits.min, UINT64_MAX))
+    nunits->min = known_eq (nunits->min, UINT64_MAX)
+                   ? new_nunits.min
+                   : ordered_min (nunits->min, new_nunits.min);
+}
+
+/* Update maximum unit count *NUNITS so that it accounts for
+   the number of units in vector type VECTYPE.  *NUNITS can be {MAX,1}
+   if we haven't yet recorded any vector types.  */
+
+inline void
+vect_update_nunits (slp_tree_nunits *nunits, tree vectype)
+{
+  slp_tree_nunits new_nunits
+    = {TYPE_VECTOR_SUBPARTS (vectype), TYPE_VECTOR_SUBPARTS (vectype)};
+  vect_update_nunits (nunits, new_nunits);
+}
+
 /* Return the vectorization factor that should be used for costing
    purposes while vectorizing the loop described by LOOP_VINFO.
    Pick a reasonable estimate if the vectorization factor isn't
-- 
2.43.0


Reply via email to