The fold-left reduction transform relies on preserving the scalar
cycle PHI.  The following rewrites how we connect this to the
involved stmt-infos instead of relying on (the actually bogus for
reduction chain) scalar stmts in SLP nodes more than absolutely
necessary.  This also makes sure to not re-associate to form a
reduction chain when a fold-left reduction is required.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

        PR tree-optimization/122371
        * tree-vect-loop.cc (vectorize_fold_left_reduction): Get
        to the scalar def to replace via the scalar PHI backedge def.
        * tree-vect-slp.cc (vect_analyze_slp_reduc_chain): Do not
        re-associate to for a reduction chain if a fold-left
        reduction is required.

        * gcc.dg/vect/vect-pr122371.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/vect-pr122371.c | 20 ++++++++++++++++++++
 gcc/tree-vect-loop.cc                     | 17 ++++++++++-------
 gcc/tree-vect-slp.cc                      |  6 +++++-
 3 files changed, 35 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-pr122371.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr122371.c 
b/gcc/testsuite/gcc.dg/vect/vect-pr122371.c
new file mode 100644
index 00000000000..fd03b846a9e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr122371.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+
+struct {
+  double lsum;
+} AnalyzeSamples_rgData;
+
+float *AnalyzeSamples_curleft;
+float AnalyzeSamples_sum_l;
+int AnalyzeSamples_i;
+
+void AnalyzeSamples() {
+  while (AnalyzeSamples_i--) {
+    float l1 = AnalyzeSamples_curleft[1] * AnalyzeSamples_curleft[1],
+          l3 = AnalyzeSamples_curleft[3] * AnalyzeSamples_curleft[3],
+          sl = l1 + l3;
+    AnalyzeSamples_sum_l += sl;
+    AnalyzeSamples_curleft += 4;
+  }
+  AnalyzeSamples_rgData.lsum += AnalyzeSamples_sum_l;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 15cb22023fc..617018f5aaf 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6405,27 +6405,30 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
      would also allow generalizing this for reduction paths of length > 1
      and/or SLP reductions.  */
   slp_tree reduc_node = SLP_TREE_CHILDREN (slp_node)[reduc_index];
-  tree reduc_var = vect_get_slp_scalar_def (reduc_node, 0);
+  stmt_vec_info reduc_var_def = SLP_TREE_SCALAR_STMTS (reduc_node)[0];
+  tree reduc_var = gimple_get_lhs (STMT_VINFO_STMT (reduc_var_def));
 
   /* The operands either come from a binary operation or an IFN_COND operation.
      The former is a gimple assign with binary rhs and the latter is a
      gimple call with four arguments.  */
   gcc_assert (num_ops == 2 || num_ops == 4);
 
-  int group_size = 1;
-  stmt_vec_info scalar_dest_def_info;
   auto_vec<tree> vec_oprnds0, vec_opmask;
   vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[(is_cond_op ? 2 : 0)
                                                  + (1 - reduc_index)],
                                                  &vec_oprnds0);
-  group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
-  scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
   /* For an IFN_COND_OP we also need the vector mask operand.  */
   if (is_cond_op)
     vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], &vec_opmask);
 
-  gimple *sdef = vect_orig_stmt (scalar_dest_def_info)->stmt;
-  tree scalar_dest = gimple_get_lhs (sdef);
+  /* The transform below relies on preserving the original scalar PHI
+     and its latch def which we replace.  So work backwards from there.  */
+  tree scalar_dest
+    = gimple_phi_arg_def_from_edge (as_a <gphi *> (STMT_VINFO_STMT
+                                                    (reduc_var_def)),
+                                   loop_latch_edge (loop));
+  stmt_vec_info scalar_dest_def_info
+    = vect_stmt_to_vectorize (loop_vinfo->lookup_def (scalar_dest));
   tree scalar_type = TREE_TYPE (scalar_dest);
 
   int vec_num = vec_oprnds0.length ();
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 9d8e64b2712..9698709f567 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4237,7 +4237,11 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
      reduction chain try to linearize an associative operation manually.  */
   if (scalar_stmts.length () == 1
       && code.is_tree_code ()
-      && associative_tree_code ((tree_code)code))
+      && associative_tree_code ((tree_code)code)
+      /* We may not associate if a fold-left reduction is required.  */
+      && !needs_fold_left_reduction_p (TREE_TYPE (gimple_get_lhs
+                                                   (scalar_stmt->stmt)),
+                                      code))
     {
       auto_vec<chain_op_t> chain;
       auto_vec<std::pair<tree_code, gimple *> > worklist;
-- 
2.51.0

Reply via email to