On 03/02/2018 08:18 PM, Cesar Philippidis wrote:
introduces a new goacc adjust_parallelism target hook.

That's another separate patch.

Committed.

Thanks,
- Tom
[openacc] Add target hook TARGET_GOACC_ADJUST_PARALLELISM

2018-03-26  Cesar Philippidis  <ce...@codesourcery.com>
	    Tom de Vries  <t...@codesourcery.com>

	* doc/tm.texi.in: Add placeholder for TARGET_GOACC_ADJUST_PARALLELISM.
	* doc/tm.texi: Regenerate.
	* omp-offload.c (oacc_loop_fixed_partitions): Use the adjust_parallelism
	hook to modify this_mask.
	(oacc_loop_auto_partitions): Use the adjust_parallelism hook to modify
	this_mask and loop->mask.
	(default_goacc_adjust_parallelism): New function.
	* target.def (adjust_parallelism): New hook.
	* targhooks.h (default_goacc_adjust_parallelism): Declare.

---
 gcc/doc/tm.texi       |  6 ++++++
 gcc/doc/tm.texi.in    |  2 ++
 gcc/omp-offload.c     | 19 +++++++++++++++++++
 gcc/target.def        |  8 ++++++++
 gcc/targhooks.h       |  1 +
 6 files changed, 49 insertions(+)

diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 0fcb9c6..271eb4d 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5883,6 +5883,12 @@ This hook should return the maximum size of a particular dimension,
 or zero if unbounded.
 @end deftypefn
 
+@deftypefn {Target Hook} unsigned TARGET_GOACC_ADJUST_PARALLELISM (unsigned @var{this_mask}, unsigned @var{outer_mask})
+This hook allows the accelerator compiler to remove any unused
+parallelism exposed in the current loop @var{THIS_MASK}, and the
+enclosing loop @var{OUTER_MASK}.  It returns an adjusted mask.
+@end deftypefn
+
 @deftypefn {Target Hook} bool TARGET_GOACC_FORK_JOIN (gcall *@var{call}, const int *@var{dims}, bool @var{is_fork})
 This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN
 function calls to target-specific gimple, or indicate whether they
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 4187da1..fc73ad1 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4298,6 +4298,8 @@ address;  but often a machine-dependent strategy can generate better code.
 
 @hook TARGET_GOACC_DIM_LIMIT
 
+@hook TARGET_GOACC_ADJUST_PARALLELISM
+
 @hook TARGET_GOACC_FORK_JOIN
 
 @hook TARGET_GOACC_REDUCTION
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index ba3f431..aa4de24 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -1258,6 +1258,13 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
 	}
     }
 
+  /* FIXME: Ideally, we should be coalescing parallelism here if the
+     hardware supports it.  E.g. Instead of partitioning a loop
+     across worker and vector axes, sometimes the hardware can
+     execute those loops together without resorting to placing
+     extra thread barriers.  */
+  this_mask = targetm.goacc.adjust_parallelism (this_mask, outer_mask);
+
   mask_all |= this_mask;
 
   if (loop->flags & OLF_TILE)
@@ -1349,6 +1356,7 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
 	  this_mask ^= loop->e_mask;
 	}
 
+      this_mask = targetm.goacc.adjust_parallelism (this_mask, outer_mask);
       loop->mask |= this_mask;
     }
 
@@ -1396,7 +1404,9 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
 			" to parallelize element loop");
 	}
 
+      loop->mask = targetm.goacc.adjust_parallelism (loop->mask, outer_mask);
       loop->mask |= this_mask;
+
       if (!loop->mask && noisy)
 	warning_at (loop->loc, 0,
 		    tiling
@@ -1774,6 +1784,15 @@ default_goacc_dim_limit (int ARG_UNUSED (axis))
 #endif
 }
 
+/* Default adjustment of loop parallelism is not required.  */
+
+unsigned
+default_goacc_adjust_parallelism (unsigned this_mask,
+				  unsigned ARG_UNUSED (outer_mask))
+{
+  return this_mask;
+}
+
 namespace {
 
 const pass_data pass_data_oacc_device_lower =
diff --git a/gcc/target.def b/gcc/target.def
index b302d36..c878fee 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1697,6 +1697,14 @@ int, (int axis),
 default_goacc_dim_limit)
 
 DEFHOOK
+(adjust_parallelism,
+"This hook allows the accelerator compiler to remove any unused\n\
+parallelism exposed in the current loop @var{THIS_MASK}, and the\n\
+enclosing loop @var{OUTER_MASK}.  It returns an adjusted mask.",
+unsigned, (unsigned this_mask, unsigned outer_mask),
+default_goacc_adjust_parallelism)
+
+DEFHOOK
 (fork_join,
 "This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN\n\
 function calls to target-specific gimple, or indicate whether they\n\
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 18070df..f4f6864 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -115,6 +115,7 @@ extern bool default_goacc_validate_dims (tree, int [], int);
 extern int default_goacc_dim_limit (int);
 extern bool default_goacc_fork_join (gcall *, const int [], bool);
 extern void default_goacc_reduction (gcall *);
+extern unsigned default_goacc_adjust_parallelism (unsigned, unsigned);
 
 /* These are here, and not in hooks.[ch], because not all users of
    hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS.  */

Reply via email to