This patch, originally by Kwok, auto-adjusts the default OpenMP target
arguments to set num_threads(1) when there are no parallel regions.
There may still be multiple teams in this case.
The result is that libgomp will not attempt to launch GPU threads that
will never get used.
OK to commit?
Andrew
OpenMP: Disable GPU threads when only teams are used
gcc/
* omp-expand.c (contains_threads): New.
(get_target_arguments): Add region argument. Set number of threads
to one if region does not contain threads.
(expand_omp_target): Add extra argument in call to
get_target_arguments.
Co-Authored-By: Andrew Stubbs <a...@codesourcery.com>
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index 0f07e51f7e8..6afe18d5ee0 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -8461,10 +8461,22 @@ push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
}
}
+static bool
+contains_threads (struct omp_region *region)
+{
+ if (!region)
+ return false;
+
+ return region->type == GIMPLE_OMP_PARALLEL
+ || contains_threads (region->inner)
+ || contains_threads (region->next);
+}
+
/* Create an array of arguments that is then passed to GOMP_target. */
static tree
-get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
+get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt,
+ struct omp_region *region)
{
auto_vec <tree, 6> args;
tree clauses = gimple_omp_target_clauses (tgt_stmt);
@@ -8481,6 +8493,11 @@ get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
else
t = integer_minus_one_node;
+
+ if (tree_int_cst_equal (t, integer_zero_node)
+ && !contains_threads (region->inner))
+ t = integer_one_node;
+
push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
GOMP_TARGET_ARG_THREAD_LIMIT, t,
&args);
@@ -8994,7 +9011,7 @@ expand_omp_target (struct omp_region *region)
depend = build_int_cst (ptr_type_node, 0);
args.quick_push (depend);
if (start_ix == BUILT_IN_GOMP_TARGET)
- args.quick_push (get_target_arguments (&gsi, entry_stmt));
+ args.quick_push (get_target_arguments (&gsi, entry_stmt, region));
break;
case BUILT_IN_GOACC_PARALLEL:
if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)