The following adds single-lane SLP support for vectorizing non-linear
inductions.

This fixes a bunch of i386 specific testcases with --param vect-force-slp=1.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

        PR tree-optimization/117172
        * tree-vect-loop.cc (vectorizable_nonlinear_induction): Add
        single-lane SLP support.
---
 gcc/tree-vect-loop.cc | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index d1f1edc704c..50a1531f4c3 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10006,10 +10006,7 @@ vectorizable_nonlinear_induction (loop_vec_info 
loop_vinfo,
 
   gcc_assert (induction_type > vect_step_op_add);
 
-  if (slp_node)
-    ncopies = 1;
-  else
-    ncopies = vect_get_num_copies (loop_vinfo, vectype);
+  ncopies = vect_get_num_copies (loop_vinfo, slp_node, vectype);
   gcc_assert (ncopies >= 1);
 
   /* FORNOW. Only handle nonlinear induction in the same loop.  */
@@ -10024,9 +10021,10 @@ vectorizable_nonlinear_induction (loop_vec_info 
loop_vinfo,
   iv_loop = loop;
   gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
 
-  /* TODO: Support slp for nonlinear iv. There should be separate vector iv
-     update for each iv and a permutation to generate wanted vector iv.  */
-  if (slp_node)
+  /* TODO: Support multi-lane SLP for nonlinear iv. There should be separate
+     vector iv update for each iv and a permutation to generate wanted
+     vector iv.  */
+  if (slp_node && SLP_TREE_LANES (slp_node) > 1)
     {
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -10237,8 +10235,13 @@ vectorizable_nonlinear_induction (loop_vec_info 
loop_vinfo,
   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
               UNKNOWN_LOCATION);
 
-  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi);
-  *vec_stmt = induction_phi;
+  if (slp_node)
+    slp_node->push_vec_def (induction_phi);
+  else
+    {
+      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi);
+      *vec_stmt = induction_phi;
+    }
 
   /* In case that vectorization factor (VF) is bigger than the number
      of elements that we can fit in a vectype (nunits), we have to generate
@@ -10268,7 +10271,10 @@ vectorizable_nonlinear_induction (loop_vec_info 
loop_vinfo,
                                              induction_type);
          gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
          new_stmt = SSA_NAME_DEF_STMT (vec_def);
-         STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+         if (slp_node)
+           slp_node->push_vec_def (new_stmt);
+         else
+           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
        }
     }
 
-- 
2.43.0

Reply via email to