Hi, unfortunately, I have missed two execution failures when using HSA (teams-6.f90 and target1.f90), both of which are caused by not handling the simd part of combined target teams distribute parallel for simd construct. I have not really thought through how exactly should GPUs teat the simd construct in a combined construct but of course we must not miscompile.
So I'd like to commit the following patch which just disallows gridification of such cases to trunk now and am working on a fix that removes the simd loop for the hsa branch. Eventually we might also want to hsa-vectorize the body even though the explicit loop is missing, but that will also mean that the HSA grid size has to shrink appropriately. Bootstrapped and tested on x86_64-linux, with and without HSA enabled. OK for trunk? Thanks, Martin 2016-02-17 Martin Jambor <mjam...@suse.cz> * omp-low.c (grid_find_ungridifiable_statement): Store problematic statements to wi->info. Also disallow omp simd constructs. (grid_target_follows_gridifiable_pattern): Use wi.info to dump reason for not gridifying. Dump special string for omp_for. --- gcc/omp-low.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index fcbb3e0..989d03e 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -17241,7 +17241,7 @@ grid_find_single_omp_among_assignments (gimple_seq seq, location_t target_loc, static tree grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, bool *handled_ops_p, - struct walk_stmt_info *) + struct walk_stmt_info *wi) { *handled_ops_p = false; gimple *stmt = gsi_stmt (*gsi); @@ -17251,6 +17251,7 @@ grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, if (gimple_call_noreturn_p (as_a <gcall *> (stmt))) { *handled_ops_p = true; + wi->info = stmt; return error_mark_node; } break; @@ -17266,8 +17267,19 @@ grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, case GIMPLE_OMP_TARGET: case GIMPLE_OMP_ORDERED: *handled_ops_p = true; + wi->info = stmt; return error_mark_node; + case GIMPLE_OMP_FOR: + if ((gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD) + && gimple_omp_for_combined_into_p (stmt)) + { + *handled_ops_p = true; + wi->info = stmt; + return error_mark_node; + } + break; + default: break; } @@ -17509,10 +17521,11 @@ grid_target_follows_gridifiable_pattern (gomp_target *target, tree *group_size_p struct walk_stmt_info wi; memset (&wi, 0, sizeof (wi)); - if (gimple *bad = walk_gimple_seq (gimple_omp_body (gfor), - grid_find_ungridifiable_statement, - NULL, &wi)) + if (walk_gimple_seq (gimple_omp_body (gfor), + grid_find_ungridifiable_statement, + NULL, &wi)) { + gimple *bad = (gimple *) wi.info; if (dump_enabled_p ()) { if (is_gimple_call (bad)) @@ -17520,6 +17533,11 @@ grid_target_follows_gridifiable_pattern (gomp_target *target, tree *group_size_p "Will not turn target construct into a gridified " " GPGPU kernel because the inner loop contains " "call to a noreturn function\n"); + if (gimple_code (bad) == GIMPLE_OMP_FOR) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a gridified " + " GPGPU kernel because the inner loop contains " + "a simd construct\n"); else dump_printf_loc (MSG_NOTE, tloc, "Will not turn target construct into a gridified " -- 2.7.1