https://gcc.gnu.org/g:2432a022ad84aabd4e0d3725a1fe148599096926
commit 2432a022ad84aabd4e0d3725a1fe148599096926 Author: Thomas Schwinge <tschwi...@baylibre.com> Date: Mon Jul 15 11:19:28 2024 +0200 GCN: Honor OpenMP 5.1 'num_teams' lower bound Corresponding to commit 9fa72756d90e0d9edadf6e6f5f56476029925788 "libgomp, nvptx: Honor OpenMP 5.1 num_teams lower bound", these are the GCN offloading changes to fix: PASS: libgomp.c/../libgomp.c-c++-common/teams-2.c (test for excess errors) [-FAIL:-]{+PASS:+} libgomp.c/../libgomp.c-c++-common/teams-2.c execution test PASS: libgomp.c++/../libgomp.c-c++-common/teams-2.c (test for excess errors) [-FAIL:-]{+PASS:+} libgomp.c++/../libgomp.c-c++-common/teams-2.c execution test ..., and omptests' 't-critical' test case. I've cross checked that those test cases are the ones that regress for nvptx offloading, if I locally revert the "libgomp, nvptx: Honor OpenMP 5.1 num_teams lower bound" changes. libgomp/ * config/gcn/libgomp-gcn.h (GOMP_TEAM_NUM): Inject. * config/gcn/target.c (GOMP_teams4): Handle. * config/gcn/team.c (gomp_gcn_enter_kernel): Initialize. * config/gcn/teams.c (omp_get_team_num): Adjust. (cherry picked from commit f9119948cedefa07a667e8beacbd5317a4d8ec1b) Diff: --- libgomp/ChangeLog.omp | 10 ++++++++++ libgomp/config/gcn/libgomp-gcn.h | 9 +++++---- libgomp/config/gcn/target.c | 29 ++++++++++++++++++++--------- libgomp/config/gcn/team.c | 3 +++ libgomp/config/gcn/teams.c | 5 +++-- 5 files changed, 41 insertions(+), 15 deletions(-) diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index 614c63346667..37be690a7b7a 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,13 @@ +2024-07-19 Thomas Schwinge <tschwi...@baylibre.com> + + Backported from trunk: + 2024-07-19 Thomas Schwinge <tschwi...@baylibre.com> + + * config/gcn/libgomp-gcn.h (GOMP_TEAM_NUM): Inject. + * config/gcn/target.c (GOMP_teams4): Handle. + * config/gcn/team.c (gomp_gcn_enter_kernel): Initialize. + * config/gcn/teams.c (omp_get_team_num): Adjust. + 2024-04-16 Andrew Pinski <quic_apin...@quicinc.com> * gfortran.dg/gomp/atomic-21.f90: Update testcase for the removal of `;`. diff --git a/libgomp/config/gcn/libgomp-gcn.h b/libgomp/config/gcn/libgomp-gcn.h index e94f0c7ae689..48a3741b04dd 100644 --- a/libgomp/config/gcn/libgomp-gcn.h +++ b/libgomp/config/gcn/libgomp-gcn.h @@ -34,10 +34,11 @@ #define DEFAULT_TEAM_ARENA_SIZE (64*1024) /* These define the LDS location of data needed by OpenMP. */ -#define TEAM_ARENA_START 16 /* LDS offset of free pointer. */ -#define TEAM_ARENA_FREE 24 /* LDS offset of free pointer. */ -#define TEAM_ARENA_END 32 /* LDS offset of end pointer. */ -#define GCN_LOWLAT_HEAP 40 /* LDS offset of the OpenMP low-latency heap. */ +#define GOMP_TEAM_NUM 16 +#define TEAM_ARENA_START 24 /* LDS offset of free pointer. */ +#define TEAM_ARENA_FREE 32 /* LDS offset of free pointer. */ +#define TEAM_ARENA_END 40 /* LDS offset of end pointer. */ +#define GCN_LOWLAT_HEAP 48 /* LDS offset of the OpenMP low-latency heap. */ struct heap { diff --git a/libgomp/config/gcn/target.c b/libgomp/config/gcn/target.c index cc043c7b23ac..2d8bde625d81 100644 --- a/libgomp/config/gcn/target.c +++ b/libgomp/config/gcn/target.c @@ -33,26 +33,37 @@ bool GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper, unsigned int thread_limit, bool first) { + int __lds *gomp_team_num = (int __lds *) GOMP_TEAM_NUM; + unsigned int num_workgroups = __builtin_gcn_dim_size (0); if (!first) - return false; + { + unsigned int team_num; + if (num_workgroups > gomp_num_teams_var) + return false; + team_num = *gomp_team_num; + if (team_num > gomp_num_teams_var - num_workgroups) + return false; + *gomp_team_num = team_num + num_workgroups; + return true; + } if (thread_limit) { struct gomp_task_icv *icv = gomp_icv (true); icv->thread_limit_var = thread_limit > INT_MAX ? UINT_MAX : thread_limit; } - unsigned int num_workgroups, workgroup_id; - num_workgroups = __builtin_gcn_dim_size (0); - workgroup_id = __builtin_gcn_dim_pos (0); - /* FIXME: If num_teams_lower > num_workgroups, we want to loop - multiple times at least for some workgroups. */ - (void) num_teams_lower; - if (!num_teams_upper || num_teams_upper >= num_workgroups) + if (!num_teams_upper) num_teams_upper = ((GOMP_ADDITIONAL_ICVS.nteams > 0 && num_workgroups > GOMP_ADDITIONAL_ICVS.nteams) ? GOMP_ADDITIONAL_ICVS.nteams : num_workgroups); - else if (workgroup_id >= num_teams_upper) + else if (num_workgroups < num_teams_lower) + num_teams_upper = num_teams_lower; + else if (num_workgroups < num_teams_upper) + num_teams_upper = num_workgroups; + unsigned int workgroup_id = __builtin_gcn_dim_pos (0); + if (workgroup_id >= num_teams_upper) return false; + *gomp_team_num = workgroup_id; gomp_num_teams_var = num_teams_upper - 1; return true; } diff --git a/libgomp/config/gcn/team.c b/libgomp/config/gcn/team.c index 47bf2df55ac8..da462f843164 100644 --- a/libgomp/config/gcn/team.c +++ b/libgomp/config/gcn/team.c @@ -68,6 +68,9 @@ gomp_gcn_enter_kernel (void) /* Starting additional threads is not supported. */ gomp_global_icv.dyn_var = true; + int __lds *gomp_team_num = (int __lds *) GOMP_TEAM_NUM; + *gomp_team_num = 0; + /* Initialize the team arena for optimized memory allocation. The arena has been allocated on the host side, and the address passed in via the kernargs. Each team takes a small slice of it. */ diff --git a/libgomp/config/gcn/teams.c b/libgomp/config/gcn/teams.c index 8a91ba8f5c1d..57404184c89c 100644 --- a/libgomp/config/gcn/teams.c +++ b/libgomp/config/gcn/teams.c @@ -44,10 +44,11 @@ omp_get_num_teams (void) return gomp_num_teams_var + 1; } -int __attribute__ ((__optimize__ ("O2"))) +int omp_get_team_num (void) { - return __builtin_gcn_dim_pos (0); + int __lds *gomp_team_num = (int __lds *) GOMP_TEAM_NUM; + return *gomp_team_num; } ialias (omp_get_num_teams)