The following fixes PR69720 where with nested reductions that require
unrolling the inner loop (and thus having multiple PHIs) we fail to
properly build the reduction epilogue.  Existing testcases in the
testsuite are also affected but for them it doesn't matter as
adding zero can be omitted safely ...

Bootstrapped on x86_64-unknown-linux-gnu, testing still in progress.

We might be able to remove the adjustment_def case completely now,
but that's not appropriate at this stage so I removed only the case
we were handling not correctly.

Richard.

2016-02-26  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/69720
        * tree-vect-loop.c (get_initial_def_for_reduction): Avoid
        the adjustment_def path for possibly vectorized defs.
        (vect_create_epilog_for_reduction): Handle vectorized initial
        defs properly.

        * gcc.dg/vect/vect-outer-pr69720.c: New testcase.

Index: gcc/tree-vect-loop.c
===================================================================
*** gcc/tree-vect-loop.c        (revision 233734)
--- gcc/tree-vect-loop.c        (working copy)
*************** get_initial_def_for_reduction (gimple *s
*** 4110,4115 ****
--- 4119,4133 ----
        return vect_create_destination_var (init_val, vectype);
      }
  
+   /* In case of a nested reduction do not use an adjustment def as
+      that case is not supported by the epilogue generation correctly
+      if ncopies is not one.  */
+   if (adjustment_def && nested_in_vect_loop)
+     {
+       *adjustment_def = NULL;
+       return vect_get_vec_def_for_operand (init_val, stmt);
+     }
+ 
    switch (code)
      {
        case WIDEN_SUM_EXPR:
*************** get_initial_def_for_reduction (gimple *s
*** 4124,4135 ****
          /* ADJUSMENT_DEF is NULL when called from
             vect_create_epilog_for_reduction to vectorize double reduction.  */
          if (adjustment_def)
!           {
!             if (nested_in_vect_loop)
!               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt);
!             else
!               *adjustment_def = init_val;
!           }
  
          if (code == MULT_EXPR)
            {
--- 4142,4148 ----
          /* ADJUSMENT_DEF is NULL when called from
             vect_create_epilog_for_reduction to vectorize double reduction.  */
          if (adjustment_def)
!         *adjustment_def = init_val;
  
          if (code == MULT_EXPR)
            {
*************** vect_create_epilog_for_reduction (vec<tr
*** 4341,4346 ****
--- 4354,4360 ----
       (in case of SLP, do it for all the phis). */
  
    /* Get the loop-entry arguments.  */
+   enum vect_def_type initial_def_dt = vect_unknown_def_type;
    if (slp_node)
      vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
                         NULL, slp_node, reduc_index);
*************** vect_create_epilog_for_reduction (vec<tr
*** 4351,4359 ****
        gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
        initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
                                           loop_preheader_edge (loop));
!       vec_initial_defs.create (1);
        vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
                                                       &adjustment_def);
        vec_initial_defs.quick_push (vec_initial_def);
      }
  
--- 4365,4374 ----
        gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
        initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
                                           loop_preheader_edge (loop));
!       vect_is_simple_use (initial_def, loop_vinfo, &def_stmt, 
&initial_def_dt);
        vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
                                                       &adjustment_def);
+       vec_initial_defs.create (1);
        vec_initial_defs.quick_push (vec_initial_def);
      }
  
*************** vect_create_epilog_for_reduction (vec<tr
*** 4368,4373 ****
--- 4383,4397 ----
        def = vect_defs[i];
        for (j = 0; j < ncopies; j++)
          {
+         if (j != 0)
+           {
+             phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
+             if (nested_in_vect_loop)
+               vec_init_def
+                 = vect_get_vec_def_for_stmt_copy (initial_def_dt,
+                                                   vec_init_def);
+           }
+ 
          /* Set the loop-entry arg of the reduction-phi.  */
  
          if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
*************** vect_create_epilog_for_reduction (vec<tr
*** 4404,4411 ****
                dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 
0);
                dump_printf (MSG_NOTE, "\n");
              }
- 
-           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
          }
      }
  
--- 4428,4433 ----
Index: gcc/testsuite/gcc.dg/vect/vect-outer-pr69720.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/vect-outer-pr69720.c      (revision 0)
--- gcc/testsuite/gcc.dg/vect/vect-outer-pr69720.c      (working copy)
***************
*** 0 ****
--- 1,28 ----
+ extern void abort (void);
+ 
+ int a[128];
+ double b[128] = { 1., 2., 3., 4. };
+ 
+ void __attribute__((noinline)) foo()
+ {
+   int i;
+   for (i = 0; i < 128; ++i)
+     {
+       double tem1 = b[i];
+       for (int j = 0; j < 32; ++j)
+       tem1 += 1;
+       b[i] = tem1;
+       a[i] = i;
+     }
+ }
+ 
+ int main()
+ {
+   foo ();
+   if (b[0] != 33. || b[1] != 34.
+       || b[2] != 35. || b[3] != 36.)
+     abort ();
+   return 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { 
target { vect_double && vect_int } } } } */

Reply via email to