Hi, the patch below makes target_follows_kernelizable_pattern stricter by adding a few checks for clauses that have to preclude kernelization. Committed to the branch.
Thanks, Martin 2015-09-02 Martin Jambor <mjam...@suse.cz> * omp-low.c (target_follows_kernelizable_pattern): Parallel num_thread clause and non-automatic loop schedule preclude kernelization. --- gcc/ChangeLog.hsa | 6 ++++++ gcc/omp-low.c | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 6c2bbe7..d6c521f 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -2832,9 +2832,23 @@ target_follows_kernelizable_pattern (gomp_target *target, tree *group_size_p, gomp_parallel *par; if (!stmt || !(par = dyn_cast <gomp_parallel *> (stmt))) return NULL; + + tree clauses = gimple_omp_parallel_clauses (par); + tree num_threads_clause = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS); + if (num_threads_clause) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a " + "simple GPGPU kernel because there is a num_threads " + "clause of the parallel construct that " + "is likely to require looping \n"); + return NULL; + } + stmt = single_stmt_in_seq_skip_bind (gimple_omp_body (par), tloc, "parallel"); - /* FIXME: We are currently ignoring parallel clauses and potentially also - sharing clauses of teams and distribute, if there are any. We need to + /* FIXME: We are currently ignoring parallel sharing clauses and potentially + also sharing clauses of teams and distribute, if there are any. We need to check they can be skipped. */ gomp_for *gfor; if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt))) @@ -2859,6 +2873,20 @@ target_follows_kernelizable_pattern (gomp_target *target, tree *group_size_p, return NULL; } + clauses = gimple_omp_for_clauses (gfor); + tree for_sched_clause = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE); + + if (for_sched_clause + && OMP_CLAUSE_SCHEDULE_KIND (for_sched_clause) != OMP_CLAUSE_SCHEDULE_AUTO) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a simple GPGPU " + "kernel because the inner loop has non-automatic " + "scheduling clause\n"); + return NULL; + } + if (teams) gather_inner_locals (gimple_omp_body (teams), kri); if (dist) -- 2.4.6