The following "fixes" the bogus loop distribution that breaks 434.zeusmp by applying the cost model for -ftree-loop-distribute-patterns as well (with -O3 -ftree-loop-distribution the bug doesn't reproduce). This papers over the real issue only but it is part of the real fix I am developing (a missing feature I pushed back for too long as well). So as it unbreaks 434.zeusmp I'll apply this beforehand. The real fix, while complete, needs some TLC and compile-time work.
Bootstrap and regtest running on x86_64-unknown-linux-gnu, I'll double-check SPEC 2k6 before committing this as well. Thanks, Richard. 2013-09-20 Richard Biener <rguent...@suse.de> PR tree-optimization/58453 * tree-loop-distribution.c (distribute_loop): Apply the cost model for -ftree-loop-distribute-patterns, too. * gcc.dg/tree-ssa/ldist-23.c: New testcase. Index: gcc/tree-loop-distribution.c =================================================================== *** gcc/tree-loop-distribution.c (revision 202774) --- gcc/tree-loop-distribution.c (working copy) *************** distribute_loop (struct loop *loop, vec< *** 1514,1531 **** any_builtin |= partition_builtin_p (partition); } /* If we are only distributing patterns fuse all partitions that ! were not properly classified as builtins. Else fuse partitions ! with similar memory accesses. */ if (!flag_tree_loop_distribution) { partition_t into; - /* If we did not detect any builtin simply bail out. */ - if (!any_builtin) - { - nbp = 0; - goto ldist_done; - } /* Only fuse adjacent non-builtin partitions, see PR53616. ??? Use dependence information to improve partition ordering. */ i = 0; --- 1514,1564 ---- any_builtin |= partition_builtin_p (partition); } + /* If we did not detect any builtin but are not asked to apply + regular loop distribution simply bail out. */ + if (!flag_tree_loop_distribution + && !any_builtin) + { + nbp = 0; + goto ldist_done; + } + + /* Apply our simple cost model - fuse partitions with similar + memory accesses. */ + partition_t into; + for (i = 0; partitions.iterate (i, &into); ++i) + { + if (partition_builtin_p (into)) + continue; + for (int j = i + 1; + partitions.iterate (j, &partition); ++j) + { + if (!partition_builtin_p (partition) + && similar_memory_accesses (rdg, into, partition)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "fusing partitions\n"); + dump_bitmap (dump_file, into->stmts); + dump_bitmap (dump_file, partition->stmts); + fprintf (dump_file, "because they have similar " + "memory accesses\n"); + } + bitmap_ior_into (into->stmts, partition->stmts); + if (partition->kind == PKIND_REDUCTION) + into->kind = PKIND_REDUCTION; + partitions.ordered_remove (j); + partition_free (partition); + j--; + } + } + } + /* If we are only distributing patterns fuse all partitions that ! were not properly classified as builtins. */ if (!flag_tree_loop_distribution) { partition_t into; /* Only fuse adjacent non-builtin partitions, see PR53616. ??? Use dependence information to improve partition ordering. */ i = 0; *************** distribute_loop (struct loop *loop, vec< *** 1549,1586 **** } while ((unsigned) i < partitions.length ()); } - else - { - partition_t into; - int j; - for (i = 0; partitions.iterate (i, &into); ++i) - { - if (partition_builtin_p (into)) - continue; - for (j = i + 1; - partitions.iterate (j, &partition); ++j) - { - if (!partition_builtin_p (partition) - && similar_memory_accesses (rdg, into, partition)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "fusing partitions\n"); - dump_bitmap (dump_file, into->stmts); - dump_bitmap (dump_file, partition->stmts); - fprintf (dump_file, "because they have similar " - "memory accesses\n"); - } - bitmap_ior_into (into->stmts, partition->stmts); - if (partition->kind == PKIND_REDUCTION) - into->kind = PKIND_REDUCTION; - partitions.ordered_remove (j); - partition_free (partition); - j--; - } - } - } - } /* Fuse all reduction partitions into the last. */ if (partitions.length () > 1) --- 1582,1587 ---- Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c =================================================================== *** gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c (revision 0) --- gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c (working copy) *************** *** 0 **** --- 1,34 ---- + /* { dg-do run } */ + /* { dg-options "-O3 -fdump-tree-ldist-details" } */ + + extern void abort (void); + + int a[128], b[128], c[128], d[128]; + + void __attribute__((noinline,noclone)) + foo (void) + { + int i; + for (i = 0; i < 128; ++i) + { + a[i] = a[i] + 1; + b[i] = d[i]; + c[i] = a[i] / d[i]; + } + } + int main() + { + int i; + for (i = 0; i < 128; ++i) + a[i] = i; + for (i = 0; i < 128; ++i) + d[i] = 1; + foo (); + if (c[0] != 1) + abort (); + return 0; + } + + /* { dg-final { scan-tree-dump "split to 2 loops" "ldist" } } */ + /* { dg-final { scan-tree-dump "generated memcpy" "ldist" } } */ + /* { dg-final { cleanup-tree-dump "ldist" } } */