https://gcc.gnu.org/g:2432a022ad84aabd4e0d3725a1fe148599096926

commit 2432a022ad84aabd4e0d3725a1fe148599096926
Author: Thomas Schwinge <tschwi...@baylibre.com>
Date:   Mon Jul 15 11:19:28 2024 +0200

    GCN: Honor OpenMP 5.1 'num_teams' lower bound
    
    Corresponding to commit 9fa72756d90e0d9edadf6e6f5f56476029925788
    "libgomp, nvptx: Honor OpenMP 5.1 num_teams lower bound", these are the
    GCN offloading changes to fix:
    
        PASS: libgomp.c/../libgomp.c-c++-common/teams-2.c (test for excess 
errors)
        [-FAIL:-]{+PASS:+} libgomp.c/../libgomp.c-c++-common/teams-2.c 
execution test
    
        PASS: libgomp.c++/../libgomp.c-c++-common/teams-2.c (test for excess 
errors)
        [-FAIL:-]{+PASS:+} libgomp.c++/../libgomp.c-c++-common/teams-2.c 
execution test
    
    ..., and omptests' 't-critical' test case.  I've cross checked that those 
test
    cases are the ones that regress for nvptx offloading, if I locally revert 
the
    "libgomp, nvptx: Honor OpenMP 5.1 num_teams lower bound" changes.
    
            libgomp/
            * config/gcn/libgomp-gcn.h (GOMP_TEAM_NUM): Inject.
            * config/gcn/target.c (GOMP_teams4): Handle.
            * config/gcn/team.c (gomp_gcn_enter_kernel): Initialize.
            * config/gcn/teams.c (omp_get_team_num): Adjust.
    
    (cherry picked from commit f9119948cedefa07a667e8beacbd5317a4d8ec1b)

Diff:
---
 libgomp/ChangeLog.omp            | 10 ++++++++++
 libgomp/config/gcn/libgomp-gcn.h |  9 +++++----
 libgomp/config/gcn/target.c      | 29 ++++++++++++++++++++---------
 libgomp/config/gcn/team.c        |  3 +++
 libgomp/config/gcn/teams.c       |  5 +++--
 5 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index 614c63346667..37be690a7b7a 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,13 @@
+2024-07-19  Thomas Schwinge  <tschwi...@baylibre.com>
+
+       Backported from trunk:
+       2024-07-19  Thomas Schwinge  <tschwi...@baylibre.com>
+
+       * config/gcn/libgomp-gcn.h (GOMP_TEAM_NUM): Inject.
+       * config/gcn/target.c (GOMP_teams4): Handle.
+       * config/gcn/team.c (gomp_gcn_enter_kernel): Initialize.
+       * config/gcn/teams.c (omp_get_team_num): Adjust.
+
 2024-04-16  Andrew Pinski  <quic_apin...@quicinc.com>
 
        * gfortran.dg/gomp/atomic-21.f90: Update testcase for the removal of 
`;`.
diff --git a/libgomp/config/gcn/libgomp-gcn.h b/libgomp/config/gcn/libgomp-gcn.h
index e94f0c7ae689..48a3741b04dd 100644
--- a/libgomp/config/gcn/libgomp-gcn.h
+++ b/libgomp/config/gcn/libgomp-gcn.h
@@ -34,10 +34,11 @@
 #define DEFAULT_TEAM_ARENA_SIZE (64*1024)
 
 /* These define the LDS location of data needed by OpenMP.  */
-#define TEAM_ARENA_START 16  /* LDS offset of free pointer.  */
-#define TEAM_ARENA_FREE  24  /* LDS offset of free pointer.  */
-#define TEAM_ARENA_END   32  /* LDS offset of end pointer.  */
-#define GCN_LOWLAT_HEAP  40  /* LDS offset of the OpenMP low-latency heap.  */
+#define GOMP_TEAM_NUM    16
+#define TEAM_ARENA_START 24  /* LDS offset of free pointer.  */
+#define TEAM_ARENA_FREE  32  /* LDS offset of free pointer.  */
+#define TEAM_ARENA_END   40  /* LDS offset of end pointer.  */
+#define GCN_LOWLAT_HEAP  48  /* LDS offset of the OpenMP low-latency heap.  */
 
 struct heap
 {
diff --git a/libgomp/config/gcn/target.c b/libgomp/config/gcn/target.c
index cc043c7b23ac..2d8bde625d81 100644
--- a/libgomp/config/gcn/target.c
+++ b/libgomp/config/gcn/target.c
@@ -33,26 +33,37 @@ bool
 GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper,
             unsigned int thread_limit, bool first)
 {
+  int __lds *gomp_team_num = (int __lds *) GOMP_TEAM_NUM;
+  unsigned int num_workgroups = __builtin_gcn_dim_size (0);
   if (!first)
-    return false;
+    {
+      unsigned int team_num;
+      if (num_workgroups > gomp_num_teams_var)
+       return false;
+      team_num = *gomp_team_num;
+      if (team_num > gomp_num_teams_var - num_workgroups)
+       return false;
+      *gomp_team_num = team_num + num_workgroups;
+      return true;
+    }
   if (thread_limit)
     {
       struct gomp_task_icv *icv = gomp_icv (true);
       icv->thread_limit_var
        = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
     }
-  unsigned int num_workgroups, workgroup_id;
-  num_workgroups = __builtin_gcn_dim_size (0);
-  workgroup_id = __builtin_gcn_dim_pos (0);
-  /* FIXME: If num_teams_lower > num_workgroups, we want to loop
-     multiple times at least for some workgroups.  */
-  (void) num_teams_lower;
-  if (!num_teams_upper || num_teams_upper >= num_workgroups)
+  if (!num_teams_upper)
     num_teams_upper = ((GOMP_ADDITIONAL_ICVS.nteams > 0
                        && num_workgroups > GOMP_ADDITIONAL_ICVS.nteams)
                       ? GOMP_ADDITIONAL_ICVS.nteams : num_workgroups);
-  else if (workgroup_id >= num_teams_upper)
+  else if (num_workgroups < num_teams_lower)
+    num_teams_upper = num_teams_lower;
+  else if (num_workgroups < num_teams_upper)
+    num_teams_upper = num_workgroups;
+  unsigned int workgroup_id = __builtin_gcn_dim_pos (0);
+  if (workgroup_id >= num_teams_upper)
     return false;
+  *gomp_team_num = workgroup_id;
   gomp_num_teams_var = num_teams_upper - 1;
   return true;
 }
diff --git a/libgomp/config/gcn/team.c b/libgomp/config/gcn/team.c
index 47bf2df55ac8..da462f843164 100644
--- a/libgomp/config/gcn/team.c
+++ b/libgomp/config/gcn/team.c
@@ -68,6 +68,9 @@ gomp_gcn_enter_kernel (void)
       /* Starting additional threads is not supported.  */
       gomp_global_icv.dyn_var = true;
 
+      int __lds *gomp_team_num = (int __lds *) GOMP_TEAM_NUM;
+      *gomp_team_num = 0;
+
       /* Initialize the team arena for optimized memory allocation.
          The arena has been allocated on the host side, and the address
          passed in via the kernargs.  Each team takes a small slice of it.  */
diff --git a/libgomp/config/gcn/teams.c b/libgomp/config/gcn/teams.c
index 8a91ba8f5c1d..57404184c89c 100644
--- a/libgomp/config/gcn/teams.c
+++ b/libgomp/config/gcn/teams.c
@@ -44,10 +44,11 @@ omp_get_num_teams (void)
   return gomp_num_teams_var + 1;
 }
 
-int __attribute__ ((__optimize__ ("O2")))
+int
 omp_get_team_num (void)
 {
-  return __builtin_gcn_dim_pos (0);
+  int __lds *gomp_team_num = (int __lds *) GOMP_TEAM_NUM;
+  return *gomp_team_num;
 }
 
 ialias (omp_get_num_teams)

Reply via email to