The following fixes double accounting of loads in loops peeled for
alignment.  It also adjusts some dump stuff to be more informative
in failing cases.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-01-29  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/64844
        * tree-vect-loop.c (vect_estimate_min_profitable_iters): Always
        dump cost model analysis.
        * tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
        Do not register adjusted load/store costs here.

        * gcc.dg/vect/pr64844.c: New testcase.

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c        (revision 220205)
+++ gcc/tree-vect-loop.c        (working copy)
@@ -2990,6 +2990,27 @@ vect_estimate_min_profitable_iters (loop
 
   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
   
+  if (dump_enabled_p ())
+    {
+      dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
+      dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
+                   vec_inside_cost);
+      dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
+                   vec_prologue_cost);
+      dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
+                   vec_epilogue_cost);
+      dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
+                   scalar_single_iter_cost);
+      dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
+                   scalar_outside_cost);
+      dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
+                   vec_outside_cost);
+      dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
+                   peel_iters_prologue);
+      dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
+                   peel_iters_epilogue);
+    }
+
   /* Calculate number of iterations required to make the vector version
      profitable, relative to the loop bodies only.  The following condition
      must hold true:
@@ -3037,30 +3058,9 @@ vect_estimate_min_profitable_iters (loop
       return;
     }
 
-  if (dump_enabled_p ())
-    {
-      dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
-      dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
-                   vec_inside_cost);
-      dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
-                   vec_prologue_cost);
-      dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
-                   vec_epilogue_cost);
-      dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
-                   scalar_single_iter_cost);
-      dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
-                   scalar_outside_cost);
-      dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
-                   vec_outside_cost);
-      dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
-                   peel_iters_prologue);
-      dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
-                   peel_iters_epilogue);
-      dump_printf (MSG_NOTE,
-                   "  Calculated minimum iters for profitability: %d\n",
-                   min_profitable_iters);
-      dump_printf (MSG_NOTE, "\n");
-    }
+  dump_printf (MSG_NOTE,
+              "  Calculated minimum iters for profitability: %d\n",
+              min_profitable_iters);
 
   min_profitable_iters =
        min_profitable_iters < vf ? vf : min_profitable_iters;
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c   (revision 220205)
+++ gcc/tree-vect-data-refs.c   (working copy)
@@ -1763,9 +1763,6 @@ vect_enhance_data_refs_alignment (loop_v
 
       if (do_peeling)
         {
-         stmt_info_for_cost *si;
-         void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
-
           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
              If the misalignment of DR_i is identical to that of dr0 then set
              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
@@ -1791,20 +1788,10 @@ vect_enhance_data_refs_alignment (loop_v
               dump_printf_loc (MSG_NOTE, vect_location,
                                "Peeling for alignment will be applied.\n");
             }
-         /* We've delayed passing the inside-loop peeling costs to the
-            target cost model until we were sure peeling would happen.
-            Do so now.  */
-         if (body_cost_vec.exists ())
-           {
-             FOR_EACH_VEC_ELT (body_cost_vec, i, si)
-               {
-                 struct _stmt_vec_info *stmt_info
-                   = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
-                 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
-                                       si->misalign, vect_body);
-               }
-             body_cost_vec.release ();
-           }
+         /* The inside-loop cost will be accounted for in vectorizable_load
+            and vectorizable_store correctly with adjusted alignments.
+            Drop the body_cst_vec on the floor here.  */
+         body_cost_vec.release ();
 
          stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
          gcc_assert (stat);
Index: gcc/testsuite/gcc.dg/vect/pr64844.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr64844.c (revision 0)
+++ gcc/testsuite/gcc.dg/vect/pr64844.c (revision 0)
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-additional-options "-ffast-math" } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+typedef __SIZE_TYPE__ size_t;
+
+static double
+compute(size_t n, double const * __restrict a, double const * __restrict b)
+{
+  double res = 0.0;
+  size_t i;
+  for (i = 0; i < n; ++i)
+    res += a[i] + b[i];
+  return res;
+}
+
+void init(double *, double *);
+
+int
+main()
+{
+  double ary1[1024];
+  double ary2[1024];
+  size_t i;
+
+  check_vect ();
+
+  // Initialize arrays
+  for (i = 0; i < 1024; ++i)
+    {
+      ary1[i] = 1 / (double)(i + 1);
+      ary2[i] = 1 + 1 / (double) (i + 1);
+      __asm__ volatile ("" : : : "memory");
+    }
+
+  // Compute two results using different starting elements
+  if ((int) compute (512, &ary1[0], &ary2[0]) != 525
+      || (int) compute(512, &ary1[1], &ary2[1]) != 523)
+    abort ();
+
+  return 0;
+}
+
+/* All targets should allow vectorizing this by some means of
+   dealing with the known misalignment in loop 2.  */
+
+/* { dg-final { scan-tree-dump-times "loop vectorized" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */

Reply via email to