Hi, this patch deals with simd constructs in gridified OpenMP loops. Standalone simds are dealt with by forcing the gridified copy to have OMP_CLAUSE_SAFELEN_EXPR of one, while simds which are a part of a combined construct with the gridified parallel loop are simply discarded.
Committed to the branch, queued for merge to trunk soon. Thanks, Martin 2016-10-03 Martin Jambor <mjam...@suse.cz> * omp-low.c (grid_find_ungridifiable_statement): Do not bail out for simd loops. (grid_inner_loop_gridifiable_p): Likewise. (grid_process_grid_body): New function. (grid_eliminate_combined_simd_part): Likewise. (grid_mark_tiling_loops): Use it. Walk body of the loop with grid_process_grid_body. (grid_process_kernel_body_copy): Likewise. --- gcc/omp-low.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 122 insertions(+), 15 deletions(-) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 05015bd..a51474b 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -17478,17 +17478,6 @@ grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, *handled_ops_p = true; wi->info = stmt; return error_mark_node; - - case GIMPLE_OMP_FOR: - if ((gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD) - && gimple_omp_for_combined_into_p (stmt)) - { - *handled_ops_p = true; - wi->info = stmt; - return error_mark_node; - } - break; - default: break; } @@ -17614,10 +17603,6 @@ grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid) dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, GRID_MISSED_MSG_PREFIX "the inner loop contains " "call to a noreturn function\n"); - else if (gimple_code (bad) == GIMPLE_OMP_FOR) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the inner loop contains " - "a simd construct\n"); else dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, GRID_MISSED_MSG_PREFIX "the inner loop contains " @@ -18212,6 +18197,113 @@ grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst, return NULL; } +/* Statement walker function to make adjustments to statements within the + gridifed kernel copy. */ + +static tree +grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p, + struct walk_stmt_info *) +{ + *handled_ops_p = false; + gimple *stmt = gsi_stmt (*gsi); + if (gimple_code (stmt) == GIMPLE_OMP_FOR + && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD)) + { + gomp_for *loop = as_a <gomp_for *> (stmt); + tree clauses = gimple_omp_for_clauses (loop); + tree cl = find_omp_clause (clauses, OMP_CLAUSE_SAFELEN); + if (cl) + OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node; + else + { + tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN); + OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node; + OMP_CLAUSE_CHAIN (c) = clauses; + gimple_omp_for_set_clauses (loop, c); + } + } + return NULL_TREE; +} + +/* Given a PARLOOP that is a normal for looping construct but also a part of a + combined construct with a simd loop, eliminate the simd loop. */ + +static void +grid_eliminate_combined_simd_part (gomp_for *parloop) +{ + struct walk_stmt_info wi; + + memset (&wi, 0, sizeof (wi)); + wi.val_only = true; + enum gf_mask msk = GF_OMP_FOR_SIMD; + wi.info = (void *) &msk; + walk_gimple_seq (gimple_omp_body (parloop), find_combined_for, NULL, &wi); + gimple *stmt = (gimple *) wi.info; + /* We expect that the SIMD id the only statement in the parallel loop. */ + gcc_assert (stmt + && gimple_code (stmt) == GIMPLE_OMP_FOR + && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD) + && gimple_omp_for_combined_into_p (stmt) + && !gimple_omp_for_combined_p (stmt)); + gomp_for *simd = as_a <gomp_for *> (stmt); + + /* Copy over the iteration properties because the body refers to the index in + the bottmom-most loop. */ + unsigned i, collapse = gimple_omp_for_collapse (parloop); + gcc_checking_assert (collapse == gimple_omp_for_collapse (simd)); + for (i = 0; i < collapse; i++) + { + gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i)); + gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i)); + gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i)); + gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i)); + } + + tree *tgt= gimple_omp_for_clauses_ptr (parloop); + while (*tgt) + tgt = &OMP_CLAUSE_CHAIN (*tgt); + + /* Copy over all clauses, except for linaer clauses, which are turned into + private clauses, and all other simd-specificl clauses, which are + ignored. */ + tree *pc = gimple_omp_for_clauses_ptr (simd); + while (*pc) + { + tree c = *pc; + switch (TREE_CODE (c)) + { + case OMP_CLAUSE_LINEAR: + { + tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE); + OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c); + OMP_CLAUSE_CHAIN (priv) = NULL; + *tgt = priv; + tgt = &OMP_CLAUSE_CHAIN (priv); + pc = &OMP_CLAUSE_CHAIN (c); + break; + } + + case OMP_CLAUSE_SAFELEN: + case OMP_CLAUSE_SIMDLEN: + case OMP_CLAUSE_ALIGNED: + pc = &OMP_CLAUSE_CHAIN (c); + break; + + default: + *pc = OMP_CLAUSE_CHAIN (c); + OMP_CLAUSE_CHAIN (c) = NULL; + *tgt = c; + tgt = &OMP_CLAUSE_CHAIN(c); + break; + } + } + + /* Finally, throw away the simd and mark the parallel loop as not + combined. */ + gimple_omp_set_body (parloop, gimple_omp_body (simd)); + gimple_omp_for_set_combined_p (parloop, false); +} + /* Statement walker function marking all parallels as grid_phony and loops as grid ones representing threads of a particular thread group. */ @@ -18225,6 +18317,14 @@ grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p, *handled_ops_p = true; gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP); gimple_omp_for_set_grid_intra_group (loop, true); + if (gimple_omp_for_combined_p (loop)) + grid_eliminate_combined_simd_part (loop); + + struct walk_stmt_info body_wi; + memset (&body_wi, 0, sizeof (body_wi)); + walk_gimple_seq_mod (gimple_omp_body_ptr (loop), + grid_process_grid_body, NULL, &body_wi); + gbind *bind = (gbind *) wi_in->info; tree c; for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c)) @@ -18344,6 +18444,13 @@ grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq, grid_copy_leading_local_assignments (prebody, dst, tgt_bind, GRID_SEGMENT_PRIVATE, wi); + if (gimple_omp_for_combined_p (inner_loop)) + grid_eliminate_combined_simd_part (inner_loop); + struct walk_stmt_info body_wi;; + memset (&body_wi, 0, sizeof (body_wi)); + walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop), + grid_process_grid_body, NULL, &body_wi); + return inner_loop; } } -- 2.10.0