This re-instantiates the previously removed CSE, fixing the
FAIL of gcc.dg/vect/costmodel/x86_64/costmodel-pr30843.c
It turns out the previous approach still works.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2020-11-04  Richard Biener  <rguent...@suse.de>

        * tree-vect-loop.c (vectorizable_induction): Re-instantiate
        previously removed CSE of SLP IVs.
---
 gcc/tree-vect-loop.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 41e2e2ade20..c09aa392419 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -7874,8 +7874,16 @@ vectorizable_induction (loop_vec_info loop_vinfo,
       if (nested_in_vect_loop)
        nivs = nvects;
       else
-       nivs = least_common_multiple (group_size,
-                                     const_nunits) / const_nunits;
+       {
+         /* Compute the number of distinct IVs we need.  First reduce
+            group_size if it is a multiple of const_nunits so we get
+            one IV for a group_size of 4 but const_nunits 2.  */
+         unsigned group_sizep = group_size;
+         if (group_sizep % const_nunits == 0)
+           group_sizep = group_sizep / const_nunits;
+         nivs = least_common_multiple (group_sizep,
+                                       const_nunits) / const_nunits;
+       }
       tree stept = TREE_TYPE (step_vectype);
       tree lupdate_mul = NULL_TREE;
       if (!nested_in_vect_loop)
@@ -7975,6 +7983,15 @@ vectorizable_induction (loop_vec_info loop_vinfo,
 
          SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi);
        }
+      if (!nested_in_vect_loop)
+       {
+         /* Fill up to the number of vectors we need for the whole group.  */
+         nivs = least_common_multiple (group_size,
+                                       const_nunits) / const_nunits;
+         for (; ivn < nivs; ++ivn)
+           SLP_TREE_VEC_STMTS (slp_node)
+             .quick_push (SLP_TREE_VEC_STMTS (slp_node)[0]);
+       }
 
       /* Re-use IVs when we can.  We are generating further vector
         stmts by adding VF' * stride to the IVs generated above.  */
-- 
2.26.2

Reply via email to