https://gcc.gnu.org/g:accb85345edb91368221fd07b74e74df427b7de0

commit r15-4324-gaccb85345edb91368221fd07b74e74df427b7de0
Author: Tamar Christina <tamar.christ...@arm.com>
Date:   Mon Oct 14 11:58:59 2024 +0100

    middle-end: support SLP early break
    
    This patch introduces feature parity for early break int the SLP only
    vectorizer.
    
    The approach taken here is to treat the early exits as root statements for 
an
    SLP tree.  This means that we don't need any changes to build_slp to support
    gconds.
    
    Codegen for the gcond itself now has to be done out of line but the body of 
the
    SLP blocks itself is simply driven by SLP scheduling.  There is a slight
    awkwardness in having re-used vectorizable_early_exit for both SLP and 
non-SLP
    but I've documented the differences and when I did try to refactor it it 
wasn't
    really worth it given that this is a temporary state anyway.
    
    This version is restricted to lane = 1, as such we can re-use the existing
    move_early_break function instead of having to do safety update through
    scheduling.  I have a branch where I'm working on that but lane > 1 is out 
of
    scope for GCC 15 anyway.   The only reason I will try to get moving through
    scheduling done as a stretch goal is so we get epilogue vectorization back 
for
    early break.
    
    The example:
    
    unsigned test4(unsigned x)
    {
     unsigned ret = 0;
     for (int i = 0; i < N; i++)
     {
       vect_b[i] = x + i;
       if (vect_a[i]*2 != x)
         break;
       vect_a[i] = x;
    
     }
     return ret;
    }
    
    builds the following SLP instance for early break:
    
    note:   Analyzing vectorizable control flow: if (patt_6 != 0)
    note:   Starting SLP discovery for
    note:     patt_6 = _4 != x_9(D);
    note:   starting SLP discovery for node 0x63abc80
    note:   Build SLP for patt_6 = _4 != x_9(D);
    note:   precomputed vectype: vector(4) <signed-boolean:32>
    note:   nunits = 4
    note:   vect_is_simple_use: operand x_9(D), type of def: external
    note:   vect_is_simple_use: operand # RANGE [irange] unsigned int [0, 0][2, 
+INF] MASK 0xffff
            _3 * 2, type of def: internal
    note:   starting SLP discovery for node 0x63abdc0
    note:   Build SLP for _4 = _3 * 2;
    note:   precomputed vectype: vector(4) unsigned int
    note:   nunits = 4
    note:   vect_is_simple_use: operand #
            vect_aD.4416[i_15], type of def: internal
    note:   vect_is_simple_use: operand 2, type of def: constant
    note:   starting SLP discovery for node 0x63abe60
    note:   Build SLP for _3 = vect_a[i_15];
    note:   precomputed vectype: vector(4) unsigned int
    note:   nunits = 4
    note:   SLP discovery for node 0x63abe60 succeeded
    note:   SLP discovery for node 0x63abdc0 succeeded
    note:   SLP discovery for node 0x63abc80 succeeded
    note:   SLP size 3 vs. limit 10.
    note:   Final SLP tree for instance 0x6474190:
    note:   node 0x63abc80 (max_nunits=4, refcnt=2) vector(4) 
<signed-boolean:32>
    note:   op template: patt_6 = _4 != x_9(D);
    note:           stmt 0 patt_6 = _4 != x_9(D);
    note:           children 0x63abd20 0x63abdc0
    note:   node (external) 0x63abd20 (max_nunits=1, refcnt=1)
    note:           { x_9(D) }
    note:   node 0x63abdc0 (max_nunits=4, refcnt=2) vector(4) unsigned int
    note:   op template: _4 = _3 * 2;
    note:           stmt 0 _4 = _3 * 2;
    note:           children 0x63abe60 0x63abf00
    note:   node 0x63abe60 (max_nunits=4, refcnt=2) vector(4) unsigned int
    note:   op template: _3 = vect_a[i_15];
    note:           stmt 0 _3 = vect_a[i_15];
    note:           load permutation { 0 }
    note:   node (constant) 0x63abf00 (max_nunits=1, refcnt=1)
    note:           { 2 }
    
    and during codegen:
    
    note:   ------>vectorizing SLP node starting from: patt_6 = _4 != x_9(D);
    note:   vect_is_simple_use: operand # RANGE [irange] unsigned int [0, 0][2, 
+INF] MASK 0xffff
            _3 * 2, type of def: internal
    note:   add new stmt: mask_patt_6.18_58 = _53 != vect__4.17_57;
    note:    === vectorizable_early_exit ===
    note:    transform early-exit.
    note:   vectorizing stmts using SLP.
    note:   Vectorizing SLP tree:
    note:   node 0x63abfa0 (max_nunits=4, refcnt=1) vector(4) int
    note:   op template: i_12 = i_15 + 1;
    note:           stmt 0 i_12 = i_15 + 1;
    note:           children 0x63aba00 0x63ac040
    note:   node 0x63aba00 (max_nunits=4, refcnt=2) vector(4) int
    note:   op template: i_15 = PHI <i_12(6), 0(14)>
    note:           [l] stmt 0 i_15 = PHI <i_12(6), 0(14)>
    note:           children (nil) (nil)
    note:   node (constant) 0x63ac040 (max_nunits=1, refcnt=1) vector(4) int
    note:           { 1 }
    
    gcc/ChangeLog:
    
            * tree-vect-loop.cc (vect_analyze_loop_2): Handle SLP trees with no
            children.
            * tree-vectorizer.h (enum slp_instance_kind): Add 
slp_inst_kind_gcond.
            (LOOP_VINFO_EARLY_BREAKS_LIVE_IVS): New.
            (vectorizable_early_exit): Expose.
            (class _loop_vec_info): Add early_break_live_stmts.
            * tree-vect-slp.cc (vect_build_slp_instance, 
vect_analyze_slp_instance):
            Support gcond instances.
            (vect_analyze_slp): Analyze gcond roots and early break live 
statements.
            (maybe_push_to_hybrid_worklist): Don't sink gconds.
            (vect_slp_analyze_operations): Support gconds.
            (vect_slp_check_for_roots): Update comments.
            (vectorize_slp_instance_root_stmt): Support gconds.
            (vect_schedule_slp): Pass vinfo to vectorize_slp_instance_root_stmt.
            * tree-vect-stmts.cc (vect_stmt_relevant_p): Record early break live
            statements.
            (vectorizable_early_exit): Support SLP.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.dg/vect/vect-early-break_126.c: New test.
            * gcc.dg/vect/vect-early-break_127.c: New test.
            * gcc.dg/vect/vect-early-break_128.c: New test.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-early-break_126.c |  28 ++++++
 gcc/testsuite/gcc.dg/vect/vect-early-break_127.c |  27 ++++++
 gcc/testsuite/gcc.dg/vect/vect-early-break_128.c |  31 ++++++
 gcc/tree-vect-loop.cc                            |   3 +
 gcc/tree-vect-slp.cc                             | 116 ++++++++++++++++++++++-
 gcc/tree-vect-stmts.cc                           |  61 +++++++++---
 gcc/tree-vectorizer.h                            |  12 ++-
 7 files changed, 257 insertions(+), 21 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_126.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_126.c
new file mode 100644
index 000000000000..4bfc9880f9fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_126.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
+
+#define N 1024
+unsigned vect_a[N];
+unsigned vect_b[N];
+ 
+unsigned test4(unsigned x)
+{
+ unsigned ret = 0;
+ for (int i = 0; i < N; i++)
+ {
+   vect_b[i] = x + i;
+   if (vect_a[i] > x)
+     {
+       ret *= vect_a[i];
+       return vect_a[i];
+     }
+   vect_a[i] = x;
+   ret += vect_a[i] + vect_b[i];
+ }
+ return ret;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_127.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_127.c
new file mode 100644
index 000000000000..67cb5d34a771
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_127.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
+
+#ifndef N
+#define N 800
+#endif
+unsigned vect_a[N];
+unsigned vect_b[N];
+  
+unsigned test4(unsigned x)
+{
+ unsigned ret = 0;
+ for (int i = 0; i < N; i++)
+ {
+   vect_b[i] = x + i;
+   if (vect_a[i]*2 != x)
+     break;
+   vect_a[i] = x;
+   
+ }
+ return ret;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c
new file mode 100644
index 000000000000..6d7fb920ec2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
+
+#ifndef N
+#define N 800
+#endif
+unsigned vect_a[N];
+unsigned vect_b[N];
+  
+unsigned test4(unsigned x)
+{
+ unsigned ret = 0;
+ for (int i = 0; i < N; i+=2)
+ {
+   vect_b[i] = x + i;
+   vect_b[i+1] = x + i+1;
+   if (vect_a[i]*2 != x)
+     break;
+   if (vect_a[i+1]*2 != x)
+     break;
+   vect_a[i] = x;
+   vect_a[i+1] = x;
+   
+ }
+ return ret;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 025442aabc38..d1f1edc704c3 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3256,6 +3256,9 @@ again:
   unsigned i, j;
   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
     {
+      if (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance)) != 
vect_internal_def)
+       continue;
+
       stmt_vec_info vinfo;
       vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0];
       if (! STMT_VINFO_GROUPED_ACCESS (vinfo))
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 83cb39fc2142..16332e0b6d74 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3756,6 +3756,13 @@ vect_build_slp_instance (vec_info *vinfo,
                         "Analyzing vectorizable constructor: %G\n",
                         root_stmt_infos[0]->stmt);
     }
+  else if (kind == slp_inst_kind_gcond)
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location,
+                        "Analyzing vectorizable control flow: %G",
+                        root_stmt_infos[0]->stmt);
+    }
 
   if (dump_enabled_p ())
     {
@@ -4827,6 +4834,80 @@ vect_analyze_slp (vec_info *vinfo, unsigned 
max_tree_size,
                                         bst_map, NULL, force_single_lane);
              }
          }
+
+      /* Find SLP sequences starting from gconds.  */
+      for (auto cond : LOOP_VINFO_LOOP_CONDS (loop_vinfo))
+       {
+         auto cond_info = loop_vinfo->lookup_stmt (cond);
+
+         cond_info = vect_stmt_to_vectorize (cond_info);
+         vec<stmt_vec_info> roots = vNULL;
+         roots.safe_push (cond_info);
+         gimple *stmt = STMT_VINFO_STMT (cond_info);
+         tree args0 = gimple_cond_lhs (stmt);
+         tree args1 = gimple_cond_rhs (stmt);
+
+         /* These should be enforced by cond lowering.  */
+         gcc_assert (gimple_cond_code (stmt) == NE_EXPR);
+         gcc_assert (zerop (args1));
+
+         /* An argument without a loop def will be codegened from vectorizing 
the
+            root gcond itself.  As such we don't need to try to build an SLP 
tree
+            from them.  It's highly likely that the resulting SLP tree here if 
both
+            arguments have a def will be incompatible, but we rely on it being 
split
+            later on.  */
+         if (auto varg = loop_vinfo->lookup_def (args0))
+           {
+             vec<stmt_vec_info> stmts;
+             vec<tree> remain = vNULL;
+             stmts.create (1);
+             stmts.quick_push (vect_stmt_to_vectorize (varg));
+
+             vect_build_slp_instance (vinfo, slp_inst_kind_gcond,
+                                      stmts, roots, remain,
+                                      max_tree_size, &limit,
+                                      bst_map, NULL, force_single_lane);
+           }
+         else
+           {
+             /* Create a new SLP instance.  */
+             slp_instance new_instance = XNEW (class _slp_instance);
+             vec<tree> ops;
+             ops.create (1);
+             ops.quick_push (args0);
+             slp_tree invnode = vect_create_new_slp_node (ops);
+             SLP_TREE_DEF_TYPE (invnode) = vect_external_def;
+             SLP_INSTANCE_TREE (new_instance) = invnode;
+             SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = 1;
+             SLP_INSTANCE_LOADS (new_instance) = vNULL;
+             SLP_INSTANCE_ROOT_STMTS (new_instance) = roots;
+             SLP_INSTANCE_REMAIN_DEFS (new_instance) = vNULL;
+             SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_gcond;
+             new_instance->reduc_phis = NULL;
+             new_instance->cost_vec = vNULL;
+             new_instance->subgraph_entries = vNULL;
+             vinfo->slp_instances.safe_push (new_instance);
+           }
+       }
+
+       /* Find and create slp instances for inductions that have been forced
+          live due to early break.  */
+       edge latch_e = loop_latch_edge (LOOP_VINFO_LOOP (loop_vinfo));
+       for (auto stmt_info : LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo))
+         {
+           vec<stmt_vec_info> stmts;
+           vec<stmt_vec_info> roots = vNULL;
+           vec<tree> remain = vNULL;
+           gphi *lc_phi = as_a<gphi *> (STMT_VINFO_STMT (stmt_info));
+           tree def = gimple_phi_arg_def_from_edge (lc_phi, latch_e);
+           stmt_vec_info lc_info = loop_vinfo->lookup_def (def);
+           stmts.create (1);
+           stmts.quick_push (vect_stmt_to_vectorize (lc_info));
+           vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
+                                    stmts, roots, remain,
+                                    max_tree_size, &limit,
+                                    bst_map, NULL, force_single_lane);
+         }
     }
 
   hash_set<slp_tree> visited_patterns;
@@ -7242,8 +7323,9 @@ maybe_push_to_hybrid_worklist (vec_info *vinfo,
            }
        }
     }
-  /* No def means this is a loo_vect sink.  */
-  if (!any_def)
+  /* No def means this is a loop_vect sink.  Gimple conditionals also don't 
have a
+     def but shouldn't be considered sinks.  */
+  if (!any_def && STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
     {
       if (dump_enabled_p ())
        dump_printf_loc (MSG_NOTE, vect_location,
@@ -8067,7 +8149,14 @@ vect_slp_analyze_operations (vec_info *vinfo)
                                            (SLP_INSTANCE_TREE (instance))))))
          /* Check we can vectorize the reduction.  */
          || (SLP_INSTANCE_KIND (instance) == slp_inst_kind_bb_reduc
-             && !vectorizable_bb_reduc_epilogue (instance, &cost_vec)))
+             && !vectorizable_bb_reduc_epilogue (instance, &cost_vec))
+         /* Check we can vectorize the gcond.  */
+         || (SLP_INSTANCE_KIND (instance) == slp_inst_kind_gcond
+             && !vectorizable_early_exit (vinfo,
+                                          SLP_INSTANCE_ROOT_STMTS 
(instance)[0],
+                                          NULL, NULL,
+                                          SLP_INSTANCE_TREE (instance),
+                                          &cost_vec)))
         {
          cost_vec.release ();
          slp_tree node = SLP_INSTANCE_TREE (instance);
@@ -8697,6 +8786,8 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo)
         !gsi_end_p (gsi); gsi_next (&gsi))
     {
       gassign *assign = dyn_cast<gassign *> (gsi_stmt (gsi));
+      /* This can be used to start SLP discovery for early breaks for BB early 
breaks
+        when we get that far.  */
       if (!assign)
        continue;
 
@@ -10924,7 +11015,7 @@ vect_remove_slp_scalar_calls (vec_info *vinfo, slp_tree 
node)
 /* Vectorize the instance root.  */
 
 void
-vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
+vectorize_slp_instance_root_stmt (vec_info *vinfo, slp_tree node, slp_instance 
instance)
 {
   gassign *rstmt = NULL;
 
@@ -11028,6 +11119,21 @@ vectorize_slp_instance_root_stmt (slp_tree node, 
slp_instance instance)
       update_stmt (gsi_stmt (rgsi));
       return;
     }
+  else if (instance->kind == slp_inst_kind_gcond)
+    {
+      /* Only support a single root for now as we can't codegen CFG yet and so 
we
+        can't support lane > 1 at this time.  */
+      gcc_assert (instance->root_stmts.length () == 1);
+      auto root_stmt_info = instance->root_stmts[0];
+      auto last_stmt = STMT_VINFO_STMT (root_stmt_info);
+      gimple_stmt_iterator rgsi = gsi_for_stmt (last_stmt);
+      gimple *vec_stmt = NULL;
+      gcc_assert (!SLP_TREE_VEC_DEFS (node).is_empty ());
+      bool res = vectorizable_early_exit (vinfo, root_stmt_info, &rgsi,
+                                         &vec_stmt, node, NULL);
+      gcc_assert (res);
+      return;
+    }
   else
     gcc_unreachable ();
 
@@ -11246,7 +11352,7 @@ vect_schedule_slp (vec_info *vinfo, const 
vec<slp_instance> &slp_instances)
        vect_schedule_scc (vinfo, node, instance, scc_info, maxdfs, stack);
 
       if (!SLP_INSTANCE_ROOT_STMTS (instance).is_empty ())
-       vectorize_slp_instance_root_stmt (node, instance);
+       vectorize_slp_instance_root_stmt (vinfo, node, instance);
 
       if (dump_enabled_p ())
        dump_printf_loc (MSG_NOTE, vect_location,
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 4f6905f15417..9b14b96cb5a6 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -411,6 +411,7 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, 
loop_vec_info loop_vinfo,
          dump_printf_loc (MSG_NOTE, vect_location,
                           "vec_stmt_relevant_p: induction forced for "
                           "early break.\n");
+      LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo).safe_push (stmt_info);
       *live_p = true;
 
     }
@@ -13016,7 +13017,7 @@ vectorizable_comparison (vec_info *vinfo,
 /* Check to see if the current early break given in STMT_INFO is valid for
    vectorization.  */
 
-static bool
+bool
 vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
                         gimple_stmt_iterator *gsi, gimple **vec_stmt,
                         slp_tree slp_node, stmt_vector_for_cost *cost_vec)
@@ -13040,8 +13041,13 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
   slp_tree slp_op0;
   tree op0;
   enum vect_def_type dt0;
-  if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, &dt0,
-                          &vectype))
+
+  /* Early break gcond kind SLP trees can be root only and have no children,
+     for instance in the case where the argument is an external.  If that's
+     the case there is no operand to analyse use of.  */
+  if ((!slp_node || !SLP_TREE_CHILDREN (slp_node).is_empty ())
+      && !vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, 
&dt0,
+                             &vectype))
     {
       if (dump_enabled_p ())
          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -13049,16 +13055,30 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
        return false;
     }
 
+  /* For SLP we don't want to use the type of the operands of the SLP node, 
when
+     vectorizing using SLP slp_node will be the children of the gcond and we
+     want to use the type of the direct children which since the gcond is root
+     will be the current node, rather than a child node as vect_is_simple_use
+     assumes.  */
+  if (slp_node)
+    vectype = SLP_TREE_VECTYPE (slp_node);
+
   if (!vectype)
     return false;
 
   machine_mode mode = TYPE_MODE (vectype);
-  int ncopies;
+  int ncopies, vec_num;
 
   if (slp_node)
-    ncopies = 1;
+    {
+      ncopies = 1;
+      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+    }
   else
-    ncopies = vect_get_num_copies (loop_vinfo, vectype);
+    {
+      ncopies = vect_get_num_copies (loop_vinfo, vectype);
+      vec_num = 1;
+    }
 
   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
   vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
@@ -13127,9 +13147,11 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
        {
          if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
                                              OPTIMIZE_FOR_SPEED))
-           vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
+           vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num,
+                                 vectype, 1);
          else
-           vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
+           vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+                                  vectype, NULL);
        }
 
       return true;
@@ -13143,9 +13165,18 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n");
 
-  if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
-                                 vec_stmt, slp_node, cost_vec))
-    gcc_unreachable ();
+  /* For SLP we don't do codegen of the body starting from the gcond, the 
gconds are
+     roots and so by the time we get to them we have already codegened the SLP 
tree
+     and so we shouldn't try to do so again.  The arguments have already been
+     vectorized.  It's not very clean to do this here, But the masking code 
below is
+     complex and this keeps it all in one place to ease fixes and backports.  
Once we
+     drop the non-SLP loop vect or split vectorizable_* this can be 
simplified.  */
+  if (!slp_node)
+    {
+      if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
+                                     vec_stmt, slp_node, cost_vec))
+       gcc_unreachable ();
+    }
 
   gimple *stmt = STMT_VINFO_STMT (stmt_info);
   basic_block cond_bb = gimple_bb (stmt);
@@ -13177,8 +13208,8 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info 
stmt_info,
        for (unsigned i = 0; i < stmts.length (); i++)
          {
            tree stmt_mask
-             = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype,
-                                   i);
+             = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies * vec_num,
+                                   vectype, i);
            stmt_mask
              = prepare_vec_mask (loop_vinfo, TREE_TYPE (stmt_mask), stmt_mask,
                                  stmts[i], &cond_gsi);
@@ -13188,8 +13219,8 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info 
stmt_info,
        for (unsigned i = 0; i < stmts.length (); i++)
          {
            tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
-                                                   lens, ncopies, vectype,
-                                                   stmts[i], i, 1);
+                                                   lens, ncopies * vec_num,
+                                                   vectype, stmts[i], i, 1);
 
            workset.quick_push (len_mask);
          }
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 2775d873ca42..11f921fbad87 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -296,7 +296,8 @@ enum slp_instance_kind {
     slp_inst_kind_reduc_group,
     slp_inst_kind_reduc_chain,
     slp_inst_kind_bb_reduc,
-    slp_inst_kind_ctor
+    slp_inst_kind_ctor,
+    slp_inst_kind_gcond
 };
 
 /* SLP instance is a sequence of stmts in a loop that can be packed into
@@ -1022,6 +1023,10 @@ public:
   /* Statements whose VUSES need updating if early break vectorization is to
      happen.  */
   auto_vec<gimple*> early_break_vuses;
+
+  /* Record statements that are needed to be live for early break vectorization
+     but may not have an LC PHI node materialized yet in the exits.  */
+  auto_vec<stmt_vec_info> early_break_live_ivs;
 } *loop_vec_info;
 
 /* Access Functions.  */
@@ -1081,6 +1086,8 @@ public:
 #define LOOP_VINFO_EARLY_BRK_STORES(L)     (L)->early_break_stores
 #define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L)  \
   (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src)
+#define LOOP_VINFO_EARLY_BREAKS_LIVE_IVS(L)  \
+  (L)->early_break_live_ivs
 #define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
 #define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
 #define LOOP_VINFO_LOOP_CONDS(L)           (L)->conds
@@ -2546,6 +2553,9 @@ extern bool vectorizable_phi (vec_info *, stmt_vec_info, 
gimple **, slp_tree,
                              stmt_vector_for_cost *);
 extern bool vectorizable_recurr (loop_vec_info, stmt_vec_info,
                                  gimple **, slp_tree, stmt_vector_for_cost *);
+extern bool vectorizable_early_exit (vec_info *, stmt_vec_info,
+                                    gimple_stmt_iterator *, gimple **,
+                                    slp_tree, stmt_vector_for_cost *);
 extern bool vect_emulated_vector_p (tree);
 extern bool vect_can_vectorize_without_simd_p (tree_code);
 extern bool vect_can_vectorize_without_simd_p (code_helper);

Reply via email to