In developing a non-unity default partition mechanism I discovered there was no
mechanism to reliably determine whether an offload was for a kernels region or
not. The tree-ssa pass uses a heuristic that is sufficient for its needs, but
not very clear.
This patch adjusts set_oacc_fn_attrib to accept a 'kernels' parameter, which it
encodes on the TREE_PUBLIC flag of the attribute values. I add an
oacc_fn_attrib_kernels_p predicate and use it where needed.
(The defaulting mechanism needs to reliably determine kernels from parallel
offload regions).
nathnan
2015-12-29 Nathan Sidwell <nat...@acm.org>
* omp-low.c (set_oacc_fn_attrib): Add IS_KERNEL arg, encode on
TREE_PUBLIC.
(oacc_fn_attrib_kernels_p): New.
(oacc_fn_attrib_level): New.
(expand_omp_target): Pass kernels_p to set_oacc_fn_attrib.
(oacc_validate_dims): Add LEVEL arg, don't return it.
(new_oacc_loop_routine): Use oacc_fn_attrib_level, not
oacc_validate_dims.
(execute_oacc_device_lower): Use oacc_fn_attrib_level, validate
dimensions after discovering loops. Add more dump info.
* omp-low.h (set_oacc_fn_attrib): Add IS_KERNEL arg.
(oacc_fn_attrib_kernels_p): Declare.
* tree-parloops.c (create_parallel_loop): Adjust
set_oacc_fn_attrib call.
* tree-ssa-loop.c (gate_oacc_kernels): Use oacc_fn_attrib_kernels_p.
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c (revision 231992)
+++ gcc/omp-low.c (working copy)
@@ -12625,10 +12625,11 @@ replace_oacc_fn_attrib (tree fn, tree di
/* Scan CLAUSES for launch dimensions and attach them to the oacc
function attribute. Push any that are non-constant onto the ARGS
- list, along with an appropriate GOMP_LAUNCH_DIM tag. */
+ list, along with an appropriate GOMP_LAUNCH_DIM tag. IS_KERNEL is
+ true, if these are for a kernels region offload function. */
void
-set_oacc_fn_attrib (tree fn, tree clauses, vec<tree> *args)
+set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
{
/* Must match GOMP_DIM ordering. */
static const omp_clause_code ids[]
@@ -12653,6 +12654,9 @@ set_oacc_fn_attrib (tree fn, tree clause
non_const |= GOMP_DIM_MASK (ix);
}
attr = tree_cons (NULL_TREE, dim, attr);
+ /* Note kernelness with TREE_PUBLIC. */
+ if (is_kernel)
+ TREE_PUBLIC (attr) = 1;
}
replace_oacc_fn_attrib (fn, attr);
@@ -12721,6 +12725,36 @@ get_oacc_fn_attrib (tree fn)
return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
}
+/* Return true if this oacc fn attrib is for a kernels offload
+ region. We use the TREE_PUBLIC flag of each dimension -- only
+ need to check the first one. */
+
+bool
+oacc_fn_attrib_kernels_p (tree attr)
+{
+ return TREE_PUBLIC (TREE_VALUE (attr));
+}
+
+/* Return level at which oacc routine may spawn a partitioned loop, or
+ -1 if it is not a routine (i.e. is an offload fn). */
+
+int
+oacc_fn_attrib_level (tree attr)
+{
+ tree pos = TREE_VALUE (attr);
+
+ if (!TREE_PURPOSE (pos))
+ return -1;
+
+ int ix = 0;
+ for (ix = 0; ix != GOMP_DIM_MAX;
+ ix++, pos = TREE_CHAIN (pos))
+ if (!integer_zerop (TREE_PURPOSE (pos)))
+ break;
+
+ return ix;
+}
+
/* Extract an oacc execution dimension from FN. FN must be an
offloaded function or routine that has already had its execution
dimensions lowered to the target-specific values. */
@@ -13045,6 +13079,7 @@ expand_omp_target (struct omp_region *re
enum built_in_function start_ix;
location_t clause_loc;
unsigned int flags_i = 0;
+ bool oacc_kernels_p = false;
switch (gimple_omp_target_kind (entry_stmt))
{
@@ -13064,8 +13099,10 @@ expand_omp_target (struct omp_region *re
start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
break;
- case GF_OMP_TARGET_KIND_OACC_PARALLEL:
case GF_OMP_TARGET_KIND_OACC_KERNELS:
+ oacc_kernels_p = true;
+ /* FALLTHROUGH */
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL:
start_ix = BUILT_IN_GOACC_PARALLEL;
break;
case GF_OMP_TARGET_KIND_OACC_DATA:
@@ -13247,7 +13284,7 @@ expand_omp_target (struct omp_region *re
break;
case BUILT_IN_GOACC_PARALLEL:
{
- set_oacc_fn_attrib (child_fn, clauses, &args);
+ set_oacc_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
tagging = true;
}
/* FALLTHRU */
@@ -19259,17 +19296,17 @@ oacc_xform_loop (gcall *call)
}
/* Validate and update the dimensions for offloaded FN. ATTRS is the
- raw attribute. DIMS is an array of dimensions, which is returned.
- Returns the function level dimensionality -- the level at which an
- offload routine wishes to partition a loop. */
+ raw attribute. DIMS is an array of dimensions, which is filled in.
+ LEVEL is the partitioning level of a routine, or -1 for an offload
+ region itself. */
-static int
-oacc_validate_dims (tree fn, tree attrs, int *dims)
+static void
+oacc_validate_dims (tree fn, tree attrs, int *dims, int level)
{
tree purpose[GOMP_DIM_MAX];
unsigned ix;
tree pos = TREE_VALUE (attrs);
- int fn_level = -1;
+ bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
/* Make sure the attribute creator attached the dimension
information. */
@@ -19278,21 +19315,12 @@ oacc_validate_dims (tree fn, tree attrs,
for (ix = 0; ix != GOMP_DIM_MAX; ix++)
{
purpose[ix] = TREE_PURPOSE (pos);
-
- if (purpose[ix])
- {
- if (integer_zerop (purpose[ix]))
- fn_level = ix + 1;
- else if (fn_level < 0)
- fn_level = ix;
- }
-
tree val = TREE_VALUE (pos);
dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
pos = TREE_CHAIN (pos);
}
- bool changed = targetm.goacc.validate_dims (fn, dims, fn_level);
+ bool changed = targetm.goacc.validate_dims (fn, dims, level);
/* Default anything left to 1. */
for (ix = 0; ix != GOMP_DIM_MAX; ix++)
@@ -19307,13 +19335,15 @@ oacc_validate_dims (tree fn, tree attrs,
/* Replace the attribute with new values. */
pos = NULL_TREE;
for (ix = GOMP_DIM_MAX; ix--;)
- pos = tree_cons (purpose[ix],
- build_int_cst (integer_type_node, dims[ix]),
- pos);
+ {
+ pos = tree_cons (purpose[ix],
+ build_int_cst (integer_type_node, dims[ix]),
+ pos);
+ if (is_kernel)
+ TREE_PUBLIC (pos) = 1;
+ }
replace_oacc_fn_attrib (fn, pos);
}
-
- return fn_level;
}
/* Create an empty OpenACC loop structure at LOC. */
@@ -19385,7 +19415,7 @@ new_oacc_loop_routine (oacc_loop *parent
{
oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
int dims[GOMP_DIM_MAX];
- int level = oacc_validate_dims (decl, attrs, dims);
+ int level = oacc_fn_attrib_level (attrs);
gcc_assert (level >= 0);
@@ -20015,13 +20045,30 @@ execute_oacc_device_lower ()
return TODO_discard_function;
}
- int dims[GOMP_DIM_MAX];
- int fn_level = oacc_validate_dims (current_function_decl, attr, dims);
-
/* Discover, partition and process the loops. */
oacc_loop *loops = oacc_loop_discovery ();
+ int fn_level = oacc_fn_attrib_level (attr);
+
+ if (dump_file)
+ fprintf (dump_file, oacc_fn_attrib_kernels_p (attr)
+ ? "Function is kernels offload\n"
+ : fn_level < 0 ? "Function is parallel offload\n"
+ : "Function is routine level %d\n", fn_level);
+
unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
oacc_loop_partition (loops, outer_mask);
+
+ int dims[GOMP_DIM_MAX];
+ oacc_validate_dims (current_function_decl, attr, dims, fn_level);
+
+ if (dump_file)
+ {
+ const char *comma = "Compute dimensions [";
+ for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
+ fprintf (dump_file, "%s%d", comma, dims[ix]);
+ fprintf (dump_file, "]\n");
+ }
+
oacc_loop_process (loops);
if (dump_file)
{
Index: gcc/omp-low.h
===================================================================
--- gcc/omp-low.h (revision 231992)
+++ gcc/omp-low.h (working copy)
@@ -33,7 +33,8 @@ extern tree omp_member_access_dummy_var
extern void replace_oacc_fn_attrib (tree, tree);
extern tree build_oacc_routine_dims (tree);
extern tree get_oacc_fn_attrib (tree);
-extern void set_oacc_fn_attrib (tree, tree, vec<tree> *);
+extern bool oacc_fn_attrib_kernels_p (tree);
+extern void set_oacc_fn_attrib (tree, tree, bool, vec<tree> *);
extern int get_oacc_ifn_dim_arg (const gimple *);
extern int get_oacc_fn_dim_size (tree, int);
Index: gcc/tree-parloops.c
===================================================================
--- gcc/tree-parloops.c (revision 231992)
+++ gcc/tree-parloops.c (working copy)
@@ -2054,7 +2054,7 @@ create_parallel_loop (struct loop *loop,
tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
OMP_CLAUSE_NUM_GANGS_EXPR (clause)
= build_int_cst (integer_type_node, n_threads);
- set_oacc_fn_attrib (cfun->decl, clause, NULL);
+ set_oacc_fn_attrib (cfun->decl, clause, true, NULL);
}
/* Initialize NEW_DATA. */
Index: gcc/tree-ssa-loop.c
===================================================================
--- gcc/tree-ssa-loop.c (revision 231992)
+++ gcc/tree-ssa-loop.c (working copy)
@@ -154,12 +154,7 @@ gate_oacc_kernels (function *fn)
tree oacc_function_attr = get_oacc_fn_attrib (fn->decl);
if (oacc_function_attr == NULL_TREE)
return false;
-
- tree val = TREE_VALUE (oacc_function_attr);
- while (val != NULL_TREE && TREE_VALUE (val) == NULL_TREE)
- val = TREE_CHAIN (val);
-
- if (val != NULL_TREE)
+ if (!oacc_fn_attrib_kernels_p (oacc_function_attr))
return false;
struct loop *loop;