The following "fixes" the bogus loop distribution that breaks
434.zeusmp by applying the cost model for -ftree-loop-distribute-patterns
as well (with -O3 -ftree-loop-distribution the bug doesn't reproduce).
This papers over the real issue only but it is part of the real
fix I am developing (a missing feature I pushed back for too long as
well).  So as it unbreaks 434.zeusmp I'll apply this beforehand.
The real fix, while complete, needs some TLC and compile-time work.

Bootstrap and regtest running on x86_64-unknown-linux-gnu, I'll 
double-check SPEC 2k6 before committing this as well.

Thanks,
Richard.

2013-09-20  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/58453
        * tree-loop-distribution.c (distribute_loop): Apply the cost
        model for -ftree-loop-distribute-patterns, too.

        * gcc.dg/tree-ssa/ldist-23.c: New testcase.

Index: gcc/tree-loop-distribution.c
===================================================================
*** gcc/tree-loop-distribution.c        (revision 202774)
--- gcc/tree-loop-distribution.c        (working copy)
*************** distribute_loop (struct loop *loop, vec<
*** 1514,1531 ****
        any_builtin |= partition_builtin_p (partition);
      }
  
    /* If we are only distributing patterns fuse all partitions that
!      were not properly classified as builtins.  Else fuse partitions
!      with similar memory accesses.  */
    if (!flag_tree_loop_distribution)
      {
        partition_t into;
-       /* If we did not detect any builtin simply bail out.  */
-       if (!any_builtin)
-       {
-         nbp = 0;
-         goto ldist_done;
-       }
        /* Only fuse adjacent non-builtin partitions, see PR53616.
           ???  Use dependence information to improve partition ordering.  */
        i = 0;
--- 1514,1564 ----
        any_builtin |= partition_builtin_p (partition);
      }
  
+   /* If we did not detect any builtin but are not asked to apply
+      regular loop distribution simply bail out.  */
+   if (!flag_tree_loop_distribution
+       && !any_builtin)
+     {
+       nbp = 0;
+       goto ldist_done;
+     }
+ 
+   /* Apply our simple cost model - fuse partitions with similar
+      memory accesses.  */
+   partition_t into;
+   for (i = 0; partitions.iterate (i, &into); ++i)
+     {
+       if (partition_builtin_p (into))
+       continue;
+       for (int j = i + 1;
+          partitions.iterate (j, &partition); ++j)
+       {
+         if (!partition_builtin_p (partition)
+             && similar_memory_accesses (rdg, into, partition))
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               {
+                 fprintf (dump_file, "fusing partitions\n");
+                 dump_bitmap (dump_file, into->stmts);
+                 dump_bitmap (dump_file, partition->stmts);
+                 fprintf (dump_file, "because they have similar "
+                          "memory accesses\n");
+               }
+             bitmap_ior_into (into->stmts, partition->stmts);
+             if (partition->kind == PKIND_REDUCTION)
+               into->kind = PKIND_REDUCTION;
+             partitions.ordered_remove (j);
+             partition_free (partition);
+             j--;
+           }
+       }
+     }
+ 
    /* If we are only distributing patterns fuse all partitions that
!      were not properly classified as builtins.  */
    if (!flag_tree_loop_distribution)
      {
        partition_t into;
        /* Only fuse adjacent non-builtin partitions, see PR53616.
           ???  Use dependence information to improve partition ordering.  */
        i = 0;
*************** distribute_loop (struct loop *loop, vec<
*** 1549,1586 ****
        }
        while ((unsigned) i < partitions.length ());
      }
-   else
-     {
-       partition_t into;
-       int j;
-       for (i = 0; partitions.iterate (i, &into); ++i)
-       {
-         if (partition_builtin_p (into))
-           continue;
-         for (j = i + 1;
-              partitions.iterate (j, &partition); ++j)
-           {
-             if (!partition_builtin_p (partition)
-                 && similar_memory_accesses (rdg, into, partition))
-               {
-                 if (dump_file && (dump_flags & TDF_DETAILS))
-                   {
-                     fprintf (dump_file, "fusing partitions\n");
-                     dump_bitmap (dump_file, into->stmts);
-                     dump_bitmap (dump_file, partition->stmts);
-                     fprintf (dump_file, "because they have similar "
-                              "memory accesses\n");
-                   }
-                 bitmap_ior_into (into->stmts, partition->stmts);
-                 if (partition->kind == PKIND_REDUCTION)
-                   into->kind = PKIND_REDUCTION;
-                 partitions.ordered_remove (j);
-                 partition_free (partition);
-                 j--;
-               }
-           }
-       }
-     }
  
    /* Fuse all reduction partitions into the last.  */
    if (partitions.length () > 1)
--- 1582,1587 ----
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c    (revision 0)
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c    (working copy)
***************
*** 0 ****
--- 1,34 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3 -fdump-tree-ldist-details" } */
+ 
+ extern void abort (void);
+ 
+ int a[128], b[128], c[128], d[128];
+ 
+ void __attribute__((noinline,noclone))
+ foo (void)
+ {
+   int i;
+   for (i = 0; i < 128; ++i)
+     {
+       a[i] = a[i] + 1;
+       b[i] = d[i];
+       c[i] = a[i] / d[i];
+     }
+ }
+ int main()
+ {
+   int i;
+   for (i = 0; i < 128; ++i)
+     a[i] = i;
+   for (i = 0; i < 128; ++i)
+     d[i] = 1;
+   foo ();
+   if (c[0] != 1)
+     abort ();
+   return 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump "split to 2 loops" "ldist" } } */
+ /* { dg-final { scan-tree-dump "generated memcpy" "ldist" } } */
+ /* { dg-final { cleanup-tree-dump "ldist" } } */

Reply via email to