Hi Tom! On Mon, 12 Oct 2015 18:56:29 +0200, Tom de Vries <tom_devr...@mentor.com> wrote: > Handle original loop tree in expand_omp_for_generic > > 2015-09-12 Tom de Vries <t...@codesourcery.com> > > PR tree-optimization/67476 > * omp-low.c (expand_omp_for_generic): Handle original loop tree.
Working on a merge from trunk into gomp-4_0-branch, I'm seeing your change (trunk r228754) conflict with code Chung-Lin changed (gomp-4_0-branch r224505). So, would you two please cherry-pick/merge trunk r228754 into gomp-4_0-branch? Thanks! (I'm assuming you can easily tell what needs to be done here; it's been a long time that Chung-Lin touched this code, so CCing him just in case.) Thanks! Chung-Lin's gomp-4_0-branch r224505: commit 5f9849b7f0723d06fcd18a18e0880d4df75da92a Author: cltang <cltang@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Tue Jun 16 08:59:01 2015 +0000 2015-06-16 Chung-Lin Tang <clt...@codesourcery.com> * omp-low.c (struct omp_region): Add inside_kernels_p field. (expand_omp_for_generic): Adjust to generate a 'sequential' loop when GOMP builtin arguments are BUILT_IN_NONE. (expand_omp_for): Use expand_omp_for_generic() to generate a non-parallelized loop for OMP_FORs inside OpenACC kernels regions. (expand_omp): Mark inside_kernels_p field true for regions nested inside OpenACC kernels constructs. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@224505 138bc75d-0d04-0410-961f-82ee72b054a4 diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp index be09b0f..6fa08da 100644 --- gcc/ChangeLog.gomp +++ gcc/ChangeLog.gomp @@ -1,3 +1,13 @@ +2015-06-16 Chung-Lin Tang <clt...@codesourcery.com> + + * omp-low.c (struct omp_region): Add inside_kernels_p field. + (expand_omp_for_generic): Adjust to generate a 'sequential' loop + when GOMP builtin arguments are BUILT_IN_NONE. + (expand_omp_for): Use expand_omp_for_generic() to generate a + non-parallelized loop for OMP_FORs inside OpenACC kernels regions. + (expand_omp): Mark inside_kernels_p field true for regions + nested inside OpenACC kernels constructs. + 2015-06-15 Cesar Philippidis <ce...@codesourcery.com> * omp-low.c (expand_omp_for_static_nochunk): Update entry_bb after diff --git gcc/omp-low.c gcc/omp-low.c index c7451c9..a3dab12 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -161,6 +161,9 @@ struct omp_region /* True if this is a combined parallel+workshare region. */ bool is_combined_parallel; + /* True if this is nested inside an OpenACC kernels construct. */ + bool inside_kernels_p; + /* For an OpenACC loop, the level of parallelism requested. */ int gwv_this; @@ -6862,6 +6865,7 @@ expand_omp_for_generic (struct omp_region *region, gassign *assign_stmt; bool in_combined_parallel = is_combined_parallel (region); bool broken_loop = region->cont == NULL; + bool seq_loop = (!start_fn || !next_fn); edge e, ne; tree *counts = NULL; int i; @@ -6949,7 +6953,20 @@ expand_omp_for_generic (struct omp_region *region, zero_iter_bb)); } } - if (in_combined_parallel) + if (seq_loop) + { + tree n1 = fold_convert (fd->iter_type, fd->loop.n1); + tree n2 = fold_convert (fd->iter_type, fd->loop.n2); + + assign_stmt = gimple_build_assign (istart0, n1); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + assign_stmt = gimple_build_assign (iend0, n2); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + t = fold_build2 (NE_EXPR, boolean_type_node, istart0, iend0); + } + else if (in_combined_parallel) { /* In a combined parallel loop, emit a call to GOMP_loop_foo_next. */ @@ -7135,32 +7152,38 @@ expand_omp_for_generic (struct omp_region *region, collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb); /* Emit code to get the next parallel iteration in L2_BB. */ - gsi = gsi_start_bb (l2_bb); + if (!seq_loop) + { + gsi = gsi_start_bb (l2_bb); - t = build_call_expr (builtin_decl_explicit (next_fn), 2, - build_fold_addr_expr (istart0), - build_fold_addr_expr (iend0)); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - if (TREE_TYPE (t) != boolean_type_node) - t = fold_build2 (NE_EXPR, boolean_type_node, - t, build_int_cst (TREE_TYPE (t), 0)); - gcond *cond_stmt = gimple_build_cond_empty (t); - gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); + t = build_call_expr (builtin_decl_explicit (next_fn), 2, + build_fold_addr_expr (istart0), + build_fold_addr_expr (iend0)); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + if (TREE_TYPE (t) != boolean_type_node) + t = fold_build2 (NE_EXPR, boolean_type_node, + t, build_int_cst (TREE_TYPE (t), 0)); + gcond *cond_stmt = gimple_build_cond_empty (t); + gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); + } } /* Add the loop cleanup function. */ gsi = gsi_last_bb (exit_bb); - if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) - t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); - else if (gimple_omp_return_lhs (gsi_stmt (gsi))) - t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); - else - t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); - gcall *call_stmt = gimple_build_call (t, 0); - if (gimple_omp_return_lhs (gsi_stmt (gsi))) - gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); - gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); + if (!seq_loop) + { + if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) + t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); + else if (gimple_omp_return_lhs (gsi_stmt (gsi))) + t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); + else + t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); + gcall *call_stmt = gimple_build_call (t, 0); + if (gimple_omp_return_lhs (gsi_stmt (gsi))) + gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); + gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); + } gsi_remove (&gsi, true); /* Connect the new blocks. */ @@ -7172,7 +7195,7 @@ expand_omp_for_generic (struct omp_region *region, gimple_seq phis; e = find_edge (cont_bb, l3_bb); - ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); + ne = make_edge (l2_bb, l3_bb, seq_loop ? EDGE_FALLTHRU : EDGE_FALSE_VALUE); phis = phi_nodes (l3_bb); for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) @@ -7208,7 +7231,8 @@ expand_omp_for_generic (struct omp_region *region, e = find_edge (cont_bb, l2_bb); e->flags = EDGE_FALLTHRU; } - make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); + if (!seq_loop) + make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); set_immediate_dominator (CDI_DOMINATORS, l2_bb, recompute_dominator (CDI_DOMINATORS, l2_bb)); @@ -7219,10 +7243,16 @@ expand_omp_for_generic (struct omp_region *region, set_immediate_dominator (CDI_DOMINATORS, l1_bb, recompute_dominator (CDI_DOMINATORS, l1_bb)); - struct loop *outer_loop = alloc_loop (); - outer_loop->header = l0_bb; - outer_loop->latch = l2_bb; - add_loop (outer_loop, l0_bb->loop_father); + struct loop *outer_loop; + if (seq_loop) + outer_loop = l0_bb->loop_father; + else + { + outer_loop = alloc_loop (); + outer_loop->header = l0_bb; + outer_loop->latch = l2_bb; + add_loop (outer_loop, l0_bb->loop_father); + } if (!gimple_omp_for_combined_p (fd->for_stmt)) { @@ -8704,7 +8734,10 @@ expand_omp_for (struct omp_region *region, gimple inner_stmt) original loops from being detected. Fix that up. */ loops_state_set (LOOPS_NEED_FIXUP); - if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD) + if (region->inside_kernels_p) + expand_omp_for_generic (region, &fd, BUILT_IN_NONE, BUILT_IN_NONE, + inner_stmt); + else if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD) expand_omp_simd (region, &fd); else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR) expand_cilk_for (region, &fd); @@ -10296,6 +10329,14 @@ expand_omp (struct omp_region *region) if (region->type == GIMPLE_OMP_PARALLEL) determine_parallel_type (region); + if (region->type == GIMPLE_OMP_TARGET && region->inner) + { + gomp_target *entry = as_a <gomp_target *> (last_stmt (region->entry)); + if (region->inside_kernels_p + || gimple_omp_target_kind (entry) == GF_OMP_TARGET_KIND_OACC_KERNELS) + region->inner->inside_kernels_p = true; + } + if (region->type == GIMPLE_OMP_FOR && gimple_omp_for_combined_p (last_stmt (region->entry))) inner_stmt = last_stmt (region->inner->entry); Tom's trunk r228754: commit 1c6a437bd44020c37452b7fb4f565f7e7f94d56b Author: vries <vries@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Tue Oct 13 10:08:40 2015 +0000 Handle original loop tree in expand_omp_for_generic 2015-10-13 Tom de Vries <t...@codesourcery.com> PR tree-optimization/67476 * omp-low.c (expand_omp_for_generic): Handle original loop tree. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@228754 138bc75d-0d04-0410-961f-82ee72b054a4 diff --git gcc/ChangeLog gcc/ChangeLog index e5ede0b..4632387 100644 --- gcc/ChangeLog +++ gcc/ChangeLog @@ -1,3 +1,8 @@ +2015-10-13 Tom de Vries <t...@codesourcery.com> + + PR tree-optimization/67476 + * omp-low.c (expand_omp_for_generic): Handle original loop tree. + 2015-10-13 Richard Biener <rguent...@suse.de> * tree-vect-data-refs.c (vect_analyze_data_ref_dependences): Allocate diff --git gcc/omp-low.c gcc/omp-low.c index b2a93b9..7e894e4 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -6439,7 +6439,6 @@ expand_omp_for_generic (struct omp_region *region, remove_edge (e); make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); - add_bb_to_loop (l2_bb, cont_bb->loop_father); e = find_edge (cont_bb, l1_bb); if (e == NULL) { @@ -6516,17 +6515,30 @@ expand_omp_for_generic (struct omp_region *region, set_immediate_dominator (CDI_DOMINATORS, l1_bb, recompute_dominator (CDI_DOMINATORS, l1_bb)); - struct loop *outer_loop = alloc_loop (); - outer_loop->header = l0_bb; - outer_loop->latch = l2_bb; - add_loop (outer_loop, l0_bb->loop_father); + /* We enter expand_omp_for_generic with a loop. This original loop may + have its own loop struct, or it may be part of an outer loop struct + (which may be the fake loop). */ + struct loop *outer_loop = entry_bb->loop_father; + bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; - if (!gimple_omp_for_combined_p (fd->for_stmt)) + add_bb_to_loop (l2_bb, outer_loop); + + /* We've added a new loop around the original loop. Allocate the + corresponding loop struct. */ + struct loop *new_loop = alloc_loop (); + new_loop->header = l0_bb; + new_loop->latch = l2_bb; + add_loop (new_loop, outer_loop); + + /* Allocate a loop structure for the original loop unless we already + had one. */ + if (!orig_loop_has_loop_struct + && !gimple_omp_for_combined_p (fd->for_stmt)) { - struct loop *loop = alloc_loop (); - loop->header = l1_bb; + struct loop *orig_loop = alloc_loop (); + orig_loop->header = l1_bb; /* The loop may have multiple latches. */ - add_loop (loop, outer_loop); + add_loop (orig_loop, new_loop); } } } The merge conflict looks as follows: set_immediate_dominator (CDI_DOMINATORS, l1_bb, recompute_dominator (CDI_DOMINATORS, l1_bb)); <<<<<<< HEAD struct loop *outer_loop; if (seq_loop) outer_loop = l0_bb->loop_father; else { outer_loop = alloc_loop (); outer_loop->header = l0_bb; outer_loop->latch = l2_bb; add_loop (outer_loop, l0_bb->loop_father); } ======= /* We enter expand_omp_for_generic with a loop. This original loop may have its own loop struct, or it may be part of an outer loop struct (which may be the fake loop). */ struct loop *outer_loop = entry_bb->loop_father; bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; >>>>>>> e2c514f0507fb1864c4eed5d691e47156be57b5b add_bb_to_loop (l2_bb, outer_loop); /* We've added a new loop around the original loop. Allocate the corresponding loop struct. */ struct loop *new_loop = alloc_loop (); new_loop->header = l0_bb; new_loop->latch = l2_bb; add_loop (new_loop, outer_loop); /* Allocate a loop structure for the original loop unless we already had one. */ if (!orig_loop_has_loop_struct && !gimple_omp_for_combined_p (fd->for_stmt)) { struct loop *orig_loop = alloc_loop (); orig_loop->header = l1_bb; /* The loop may have multiple latches. */ add_loop (orig_loop, new_loop); } } } Grüße, Thomas
signature.asc
Description: PGP signature