On 03/02/2018 08:18 PM, Cesar Philippidis wrote:
introduces a new goacc adjust_parallelism target hook.
That's another separate patch. Committed. Thanks, - Tom
[openacc] Add target hook TARGET_GOACC_ADJUST_PARALLELISM 2018-03-26 Cesar Philippidis <ce...@codesourcery.com> Tom de Vries <t...@codesourcery.com> * doc/tm.texi.in: Add placeholder for TARGET_GOACC_ADJUST_PARALLELISM. * doc/tm.texi: Regenerate. * omp-offload.c (oacc_loop_fixed_partitions): Use the adjust_parallelism hook to modify this_mask. (oacc_loop_auto_partitions): Use the adjust_parallelism hook to modify this_mask and loop->mask. (default_goacc_adjust_parallelism): New function. * target.def (adjust_parallelism): New hook. * targhooks.h (default_goacc_adjust_parallelism): Declare. --- gcc/doc/tm.texi | 6 ++++++ gcc/doc/tm.texi.in | 2 ++ gcc/omp-offload.c | 19 +++++++++++++++++++ gcc/target.def | 8 ++++++++ gcc/targhooks.h | 1 + 6 files changed, 49 insertions(+) diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 0fcb9c6..271eb4d 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5883,6 +5883,12 @@ This hook should return the maximum size of a particular dimension, or zero if unbounded. @end deftypefn +@deftypefn {Target Hook} unsigned TARGET_GOACC_ADJUST_PARALLELISM (unsigned @var{this_mask}, unsigned @var{outer_mask}) +This hook allows the accelerator compiler to remove any unused +parallelism exposed in the current loop @var{THIS_MASK}, and the +enclosing loop @var{OUTER_MASK}. It returns an adjusted mask. +@end deftypefn + @deftypefn {Target Hook} bool TARGET_GOACC_FORK_JOIN (gcall *@var{call}, const int *@var{dims}, bool @var{is_fork}) This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN function calls to target-specific gimple, or indicate whether they diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 4187da1..fc73ad1 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4298,6 +4298,8 @@ address; but often a machine-dependent strategy can generate better code. @hook TARGET_GOACC_DIM_LIMIT +@hook TARGET_GOACC_ADJUST_PARALLELISM + @hook TARGET_GOACC_FORK_JOIN @hook TARGET_GOACC_REDUCTION diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c index ba3f431..aa4de24 100644 --- a/gcc/omp-offload.c +++ b/gcc/omp-offload.c @@ -1258,6 +1258,13 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) } } + /* FIXME: Ideally, we should be coalescing parallelism here if the + hardware supports it. E.g. Instead of partitioning a loop + across worker and vector axes, sometimes the hardware can + execute those loops together without resorting to placing + extra thread barriers. */ + this_mask = targetm.goacc.adjust_parallelism (this_mask, outer_mask); + mask_all |= this_mask; if (loop->flags & OLF_TILE) @@ -1349,6 +1356,7 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask, this_mask ^= loop->e_mask; } + this_mask = targetm.goacc.adjust_parallelism (this_mask, outer_mask); loop->mask |= this_mask; } @@ -1396,7 +1404,9 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask, " to parallelize element loop"); } + loop->mask = targetm.goacc.adjust_parallelism (loop->mask, outer_mask); loop->mask |= this_mask; + if (!loop->mask && noisy) warning_at (loop->loc, 0, tiling @@ -1774,6 +1784,15 @@ default_goacc_dim_limit (int ARG_UNUSED (axis)) #endif } +/* Default adjustment of loop parallelism is not required. */ + +unsigned +default_goacc_adjust_parallelism (unsigned this_mask, + unsigned ARG_UNUSED (outer_mask)) +{ + return this_mask; +} + namespace { const pass_data pass_data_oacc_device_lower = diff --git a/gcc/target.def b/gcc/target.def index b302d36..c878fee 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1697,6 +1697,14 @@ int, (int axis), default_goacc_dim_limit) DEFHOOK +(adjust_parallelism, +"This hook allows the accelerator compiler to remove any unused\n\ +parallelism exposed in the current loop @var{THIS_MASK}, and the\n\ +enclosing loop @var{OUTER_MASK}. It returns an adjusted mask.", +unsigned, (unsigned this_mask, unsigned outer_mask), +default_goacc_adjust_parallelism) + +DEFHOOK (fork_join, "This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN\n\ function calls to target-specific gimple, or indicate whether they\n\ diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 18070df..f4f6864 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -115,6 +115,7 @@ extern bool default_goacc_validate_dims (tree, int [], int); extern int default_goacc_dim_limit (int); extern bool default_goacc_fork_join (gcall *, const int [], bool); extern void default_goacc_reduction (gcall *); +extern unsigned default_goacc_adjust_parallelism (unsigned, unsigned); /* These are here, and not in hooks.[ch], because not all users of hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */