On 05/24/2011 10:21 PM, Sebastian Pop wrote:
Hi Alexey,
Thanks again for the cleanup patch.
While I was testing the change, I found out that there are some flag
combinations that are iceing the compiler. Could you please have
a look at this PR: http://gcc.gnu.org/PR49147
Thanks,
Sebastian
Hi Sebastian,
Here is the patch for this issue.
It is caused by the bug in opencl_get_perfect_nested_loop_depth.
This functions ignores -fgraphite-opencl-ignore-dep flag while
opencl_transform_stmt_list considers this flag.
This patch adds a new function, which should be used to check for dependencies in
graphite-opencl. I tested it on PolyKernels and on the block-0.c test from graphite
testsuite.
Is this patch OK for graphite branch? If it is, could you commit it?
gcc:
2011-05-26 Alexey Kravets <[email protected]>
RP middle-end/49147
* graphite-opencl-codegen.c (opencl_get_perfect_nested_loop_depth):
Use enabled_dependency_in_clast_loop_p instead of
dependency_in_clast_loop_p.
graphite-opencl.c (opencl_transform_stmt_list): Ditto.
(enabled_dependency_in_clast_loop_p): New.
graphite-opencl.h (enabled_dependency_in_clast_loop_p): Export.
testsuite:
2011-05-26 Alexey Kravets <[email protected]>
RP middle-end/49147
* gcc.dg/graphite/pr49147.c: New test.
--
Alexey Kravets
diff --git a/gcc/graphite-opencl-codegen.c b/gcc/graphite-opencl-codegen.c
index 891fef0..67588df 100644
--- a/gcc/graphite-opencl-codegen.c
+++ b/gcc/graphite-opencl-codegen.c
@@ -280,7 +280,7 @@ opencl_get_perfect_nested_loop_depth (opencl_main code_gen,
{
struct clast_for *child;
- if (dependency_in_clast_loop_p (code_gen, meta, loop, depth))
+ if (enabled_dependency_in_clast_loop_p (code_gen, meta, loop, depth))
return 0;
child = opencl_get_single_loop_child (loop);
diff --git a/gcc/graphite-opencl.c b/gcc/graphite-opencl.c
index 53704d2..a4ce5aa 100644
--- a/gcc/graphite-opencl.c
+++ b/gcc/graphite-opencl.c
@@ -2144,9 +2144,8 @@ opencl_transform_stmt_list (struct clast_stmt *s, opencl_main code_gen,
bool parallel = false;
/* If there are dependencies in loop, it can't be parallelized. */
- if (!flag_graphite_opencl_no_dep_check &&
- dependency_in_clast_loop_p (code_gen, current_clast,
- for_stmt, depth))
+ if (enabled_dependency_in_clast_loop_p (code_gen, current_clast,
+ for_stmt, depth))
{
if (dump_p)
fprintf (dump_file, "dependency in loop\n");
@@ -2609,6 +2608,20 @@ dependency_in_clast_loop_p (opencl_main code_gen, opencl_clast_meta meta,
return false;
}
+/* Returns true, if dependency checking enabled and there is dependency in
+ clast loop STMT on depth DEPTH.
+ CODE_GEN holds information related to code generation. */
+
+bool
+enabled_dependency_in_clast_loop_p (opencl_main code_gen,
+ opencl_clast_meta meta,
+ struct clast_for *stmt, int depth)
+{
+ if (flag_graphite_opencl_no_dep_check)
+ return false;
+ return dependency_in_clast_loop_p (code_gen, meta, stmt, depth);
+}
+
/* Init graphite-opencl pass. Must be called in each function before
any scop processing. */
diff --git a/gcc/graphite-opencl.h b/gcc/graphite-opencl.h
index dd047c9..1fab592 100644
--- a/gcc/graphite-opencl.h
+++ b/gcc/graphite-opencl.h
@@ -251,4 +251,6 @@ extern bool opencl_should_be_parallel_p (opencl_main, opencl_clast_meta, int);
extern basic_block opencl_create_bb (opencl_main);
extern bool dependency_in_clast_loop_p (opencl_main, opencl_clast_meta,
struct clast_for *, int);
+extern bool enabled_dependency_in_clast_loop_p (opencl_main, opencl_clast_meta,
+ struct clast_for *, int);
extern tree opencl_get_base_object_by_tree (tree);
diff --git a/gcc/testsuite/gcc.dg/graphite/pr49147.c b/gcc/testsuite/gcc.dg/graphite/pr49147.c
new file mode 100644
index 0000000..d5f5f30
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/pr49147.c
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-options "-fgraphite-opencl -fgraphite-opencl-ignore-dep -g -Ofast" } */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+#define N 1000
+int a[N];
+
+static int __attribute__((noinline))
+foo (void)
+{
+ int j;
+ int i;
+
+ /* This is not blocked as it is not profitable. */
+ for (i = 0; i < N; i++)
+ for (j = 0; j < N; j++)
+ a[j] = a[i] + 1;
+
+ return a[0];
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+ int i, res;
+
+ for (i = 0; i < N; i++)
+ a[i] = i;
+
+ res = foo ();
+
+#if DEBUG
+ fprintf (stderr, "res = %d \n", res);
+#endif
+
+ if (res != 1999)
+ abort ();
+
+ return 0;
+}