On 03/06/15 13:20, Richard Biener wrote:
On Wed, 3 Jun 2015, Tom de Vries wrote:
On 22/04/15 09:39, Richard Biener wrote:
Ehm. So why not simply add a flag to struct loop instead and set it
during OMP region parsing/lowering?
Attached patch adds an in_oacc_kernels_region flag to struct loop, and uses
it. OK for gomp-4_0-branch?
Works for me.
Committed as attached, with minor fix to pass bootstrap.
Thanks,
- Tom
Add in_oacc_kernels_region field to struct loop
2015-06-03 Tom de Vries <t...@codesourcery.com>
* cfgloop.h (struct loop): Add in_oacc_kernels_region field.
* omp-low.c (mark_loops_in_oacc_kernels_region): New function.
(loop_get_oacc_kernels_region_entry): New function.
(expand_omp_target): Call mark_loops_in_oacc_kernels_region.
(loop_in_oacc_kernels_region_p): Remove function.
* omp-low.h (loop_in_oacc_kernels_region_p): Remove declaration.
(loop_get_oacc_kernels_region_entry): Declare.
* tree-parloops.c (parallelize_loops): Use in_oacc_kernels_region field and
loop_get_oacc_kernels_region_entry.
* tree-ssa-loop-ch.c (pass_ch_execute): Use in_oacc_kernels_region field.
---
gcc/cfgloop.h | 3 +
gcc/omp-low.c | 155 ++++++++++++++++++++-----------------------------
gcc/omp-low.h | 3 +-
gcc/tree-parloops.c | 7 ++-
gcc/tree-ssa-loop-ch.c | 2 +-
5 files changed, 73 insertions(+), 97 deletions(-)
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 1d84572..a3654d9 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -195,6 +195,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
/* True if we should try harder to vectorize this loop. */
bool force_vectorize;
+ /* True if the loop is part of an oacc kernels region. */
+ bool in_oacc_kernels_region;
+
/* For SIMD loops, this is a unique identifier of the loop, referenced
by IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LANE and IFN_GOMP_SIMD_LAST_LANE
builtins. */
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index b1aa603..22a57af 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -9425,6 +9425,68 @@ oacc_alloc_broadcast_storage (omp_context *ctx)
TYPE_SIZE_UNIT (vull_type_node));
}
+/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
+ at REGION_EXIT. */
+
+static void
+mark_loops_in_oacc_kernels_region (basic_block region_entry,
+ basic_block region_exit)
+{
+ bitmap dominated_bitmap = BITMAP_GGC_ALLOC ();
+ bitmap excludes_bitmap = BITMAP_GGC_ALLOC ();
+ unsigned di;
+ basic_block bb;
+
+ bitmap_clear (dominated_bitmap);
+ bitmap_clear (excludes_bitmap);
+
+ /* Get all the blocks dominated by the region entry. That will include the
+ entire region. */
+ vec<basic_block> dominated
+ = get_all_dominated_blocks (CDI_DOMINATORS, region_entry);
+ FOR_EACH_VEC_ELT (dominated, di, bb)
+ bitmap_set_bit (dominated_bitmap, bb->index);
+
+ /* Exclude all the blocks which are not in the region: the blocks dominated by
+ the region exit. */
+ if (region_exit != NULL)
+ {
+ vec<basic_block> excludes
+ = get_all_dominated_blocks (CDI_DOMINATORS, region_exit);
+ FOR_EACH_VEC_ELT (excludes, di, bb)
+ bitmap_set_bit (excludes_bitmap, bb->index);
+ }
+
+ /* Mark the loops in the region. */
+ struct loop *loop;
+ FOR_EACH_LOOP (loop, 0)
+ if (bitmap_bit_p (dominated_bitmap, loop->header->index)
+ && !bitmap_bit_p (excludes_bitmap, loop->header->index))
+ loop->in_oacc_kernels_region = true;
+}
+
+/* Return the entry basic block of the oacc kernels region containing LOOP. */
+
+basic_block
+loop_get_oacc_kernels_region_entry (struct loop *loop)
+{
+ if (!loop->in_oacc_kernels_region)
+ return NULL;
+
+ basic_block bb = loop->header;
+ while (true)
+ {
+ bb = get_immediate_dominator (CDI_DOMINATORS, bb);
+ gcc_assert (bb != NULL);
+
+ gimple last = last_stmt (bb);
+ if (last != NULL
+ && gimple_code (last) == GIMPLE_OMP_TARGET
+ && gimple_omp_target_kind (last) == GF_OMP_TARGET_KIND_OACC_KERNELS)
+ return bb;
+ }
+}
+
/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
static void
@@ -9495,6 +9557,8 @@ expand_omp_target (struct omp_region *region)
as an optimization barrier. */
do_splitoff = false;
cfun->curr_properties &= ~PROP_gimple_eomp;
+
+ mark_loops_in_oacc_kernels_region (region->entry, region->exit);
}
else
{
@@ -15331,97 +15395,6 @@ gimple_stmt_omp_data_i_init_p (gimple stmt)
SSA_OP_DEF);
}
-/* Return true if LOOP is inside a kernels region. */
-
-bool
-loop_in_oacc_kernels_region_p (struct loop *loop, basic_block *region_entry,
- basic_block *region_exit)
-{
- bitmap excludes_bitmap = BITMAP_GGC_ALLOC ();
- bitmap region_bitmap = BITMAP_GGC_ALLOC ();
- bitmap_clear (region_bitmap);
-
- if (region_entry != NULL)
- *region_entry = NULL;
- if (region_exit != NULL)
- *region_exit = NULL;
-
- basic_block bb;
- gimple last;
- FOR_EACH_BB_FN (bb, cfun)
- {
- if (bitmap_bit_p (region_bitmap, bb->index))
- continue;
-
- last = last_stmt (bb);
- if (!last)
- continue;
-
- if (gimple_code (last) != GIMPLE_OMP_TARGET
- || (gimple_omp_target_kind (last) != GF_OMP_TARGET_KIND_OACC_KERNELS))
- continue;
-
- bitmap_clear (excludes_bitmap);
- bitmap_set_bit (excludes_bitmap, bb->index);
-
- vec<basic_block> dominated
- = get_all_dominated_blocks (CDI_DOMINATORS, bb);
-
- unsigned di;
- basic_block dom;
-
- basic_block end_region = NULL;
- FOR_EACH_VEC_ELT (dominated, di, dom)
- {
- if (dom == bb)
- continue;
-
- last = last_stmt (dom);
- if (!last)
- continue;
-
- if (gimple_code (last) != GIMPLE_OMP_RETURN)
- continue;
-
- if (end_region == NULL
- || dominated_by_p (CDI_DOMINATORS, end_region, dom))
- end_region = dom;
- }
-
- if (end_region == NULL)
- {
- gimple kernels = last_stmt (bb);
- fatal_error (gimple_location (kernels),
- "End of kernel region unreachable");
- }
-
- vec<basic_block> excludes
- = get_all_dominated_blocks (CDI_DOMINATORS, end_region);
-
- unsigned di2;
- basic_block exclude;
-
- FOR_EACH_VEC_ELT (excludes, di2, exclude)
- if (exclude != end_region)
- bitmap_set_bit (excludes_bitmap, exclude->index);
-
- FOR_EACH_VEC_ELT (dominated, di, dom)
- if (!bitmap_bit_p (excludes_bitmap, dom->index))
- bitmap_set_bit (region_bitmap, dom->index);
-
- if (bitmap_bit_p (region_bitmap, loop->header->index))
- {
- if (region_entry != NULL)
- *region_entry = bb;
- if (region_exit != NULL)
- *region_exit = end_region;
- return true;
- }
- }
-
- return false;
-}
-
namespace {
const pass_data pass_data_late_lower_omp =
diff --git a/gcc/omp-low.h b/gcc/omp-low.h
index ae63c9f..fbc8416 100644
--- a/gcc/omp-low.h
+++ b/gcc/omp-low.h
@@ -29,8 +29,7 @@ extern tree omp_reduction_init (tree, tree);
extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
extern void omp_finish_file (void);
extern bool gimple_stmt_omp_data_i_init_p (gimple);
-extern bool loop_in_oacc_kernels_region_p (struct loop *, basic_block *,
- basic_block *);
+extern basic_block loop_get_oacc_kernels_region_entry (struct loop *);
extern GTY(()) vec<tree, va_gc> *offload_funcs;
extern GTY(()) vec<tree, va_gc> *offload_vars;
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 72877ee..e451704 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2629,7 +2629,7 @@ parallelize_loops (bool oacc_kernels_p)
struct obstack parloop_obstack;
HOST_WIDE_INT estimated;
source_location loop_loc;
- basic_block region_entry, region_exit;
+ basic_block region_entry = NULL;
/* Do not parallelize loops in the functions created by parallelization. */
if (parallelized_function_p (cfun->decl))
@@ -2649,8 +2649,7 @@ parallelize_loops (bool oacc_kernels_p)
if (oacc_kernels_p)
{
- if (!loop_in_oacc_kernels_region_p (loop, ®ion_entry,
- ®ion_exit))
+ if (!loop->in_oacc_kernels_region)
continue;
/* TODO: Allow nested loops. */
@@ -2661,6 +2660,8 @@ parallelize_loops (bool oacc_kernels_p)
fprintf (dump_file,
"Trying loop %d with header bb %d in oacc kernels region\n",
loop->num, loop->header->index);
+
+ region_entry = loop_get_oacc_kernels_region_entry (loop);
}
if (dump_file && (dump_flags & TDF_DETAILS))
diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c
index 1cd77e6..7527efd 100644
--- a/gcc/tree-ssa-loop-ch.c
+++ b/gcc/tree-ssa-loop-ch.c
@@ -225,7 +225,7 @@ pass_ch_execute (function *fun, bool oacc_kernels_p)
continue;
if (oacc_kernels_p
- && !loop_in_oacc_kernels_region_p (loop, NULL, NULL))
+ && !loop->in_oacc_kernels_region)
continue;
/* Iterate the header copying up to limit; this takes care of the cases
--
1.9.1