Hi,

The OpenACC execution model states that implementing a critical
section across workers using atomic operations and a busy-wait loop may never
succeed, since the scheduler may suspend the worker that owns the lock, in
which case the worker waiting on the lock can never complete.

Add a test-case that implements the next best thing: a spinlock using a
busy-wait loop that gives up after a certain number of tries.

This ensures termination, and makes the test-case a valid one, while still
excercising atomic exchange and atomic store.

OK for trunk?

Thanks,
- Tom

[libgomp, openacc] Add terminating spinlock test-cases

libgomp/ChangeLog:

2022-02-02  Tom de Vries  <tdevr...@suse.de>

        * testsuite/libgomp.oacc-c/spin-lock-global.c: New test.
        * testsuite/libgomp.oacc-c/spin-lock-global.h: New test.
        * testsuite/libgomp.oacc-c/spin-lock-shared.c: New test.
        * testsuite/libgomp.oacc-c/spin-lock-shared.h: New test.

---
 .../testsuite/libgomp.oacc-c/spin-lock-global.c    |  43 ++++++
 .../testsuite/libgomp.oacc-c/spin-lock-global.h    | 169 +++++++++++++++++++++
 .../testsuite/libgomp.oacc-c/spin-lock-shared.c    |  35 +++++
 .../testsuite/libgomp.oacc-c/spin-lock-shared.h    | 135 ++++++++++++++++
 4 files changed, 382 insertions(+)

diff --git a/libgomp/testsuite/libgomp.oacc-c/spin-lock-global.c 
b/libgomp/testsuite/libgomp.oacc-c/spin-lock-global.c
new file mode 100644
index 00000000000..0c1da9e842f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/spin-lock-global.c
@@ -0,0 +1,43 @@
+#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
+#include <assert.h>
+
+enum memmodel
+  {
+    MEMMODEL_RELAXED = 0,
+    MEMMODEL_ACQUIRE = 2,
+    MEMMODEL_RELEASE = 3,
+    MEMMODEL_SEQ_CST = 5,
+  };
+
+#define TYPE unsigned int
+#define LOCKVAR1 lock_32_1
+#define LOCKVAR2 lock_32_2
+#define TESTS tests_32
+#include "spin-lock-global.h"
+#undef TYPE
+#undef LOCKVAR1
+#undef LOCKVAR2
+#undef TESTS
+
+#define TYPE unsigned long long int
+#define LOCKVAR1 lock_64_1
+#define LOCKVAR2 lock_64_2
+#define TESTS tests_64
+#include "spin-lock-global.h"
+#undef TYPE
+#undef LOCKVAR1
+#undef LOCKVAR2
+#undef TESTS
+
+#define N (7 * 1000)
+
+int
+main (void)
+{
+  tests_32 (N);
+  tests_64 (N);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/spin-lock-global.h 
b/libgomp/testsuite/libgomp.oacc-c/spin-lock-global.h
new file mode 100644
index 00000000000..ea63fafccb9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/spin-lock-global.h
@@ -0,0 +1,169 @@
+#define XSTR(S) STR (S)
+#define STR(S) #S
+
+#define PRINTF(...)                            \
+  {                                            \
+    printf (__VA_ARGS__);                      \
+    fflush (NULL);                             \
+  }
+
+#define DO_PRAGMA(x) _Pragma (#x)
+
+#ifndef SPIN_CNT_MAX
+/* Define to have limited-spin spinlock.
+   Ensures that the program will terminate.  */
+#define SPIN_CNT_MAX 0x8000U
+#endif
+
+#define TEST_1(N, LOCKVAR, VERIFY, N_GANGS, N_WORKERS)                 \
+  assert (N % N_GANGS == 0);                                           \
+                                                                       \
+  DO_PRAGMA (acc parallel                                              \
+            num_gangs(N_GANGS)                                         \
+            num_workers(N_WORKERS)                                     \
+            copy (lock_cnt)                                            \
+            copy (spin_cnt_max_hit)                                    \
+            present (LOCKVAR))                                         \
+  {                                                                    \
+    TYPE unlocked = (TYPE)0;                                           \
+    TYPE locked = ~unlocked;                                           \
+                                                                       \
+    LOCKVAR = unlocked;                                                        
\
+                                                                       \
+    unsigned int n_gangs                                               \
+      = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);                 \
+                                                                       \
+    DO_PRAGMA (acc loop worker)                                                
\
+      for (unsigned int i = 0; i < N / n_gangs; i++)                   \
+       {                                                               \
+         TYPE res;                                                     \
+                                                                       \
+         unsigned int spin_cnt = 0;                                    \
+         while (1)                                                     \
+           {                                                           \
+             res = __atomic_exchange_n (&LOCKVAR, locked,              \
+                                        MEMMODEL_ACQUIRE);             \
+             if (res == locked)                                        \
+               {                                                       \
+                 if (SPIN_CNT_MAX > 0)                                 \
+                   {                                                   \
+                     spin_cnt++;                                       \
+                     if (spin_cnt == SPIN_CNT_MAX)                     \
+                       {                                               \
+                         if (VERIFY)                                   \
+                           __atomic_fetch_add (&spin_cnt_max_hit, 1,   \
+                                               MEMMODEL_RELAXED);      \
+                         break;                                        \
+                       }                                               \
+                   }                                                   \
+                 continue;                                             \
+                                                                       \
+               }                                                       \
+             else                                                      \
+               {                                                       \
+                 if (res != unlocked)                                  \
+                   __builtin_abort ();                                 \
+                                                                       \
+                 if (VERIFY)                                           \
+                   __atomic_fetch_add (&lock_cnt, 1,                   \
+                                       MEMMODEL_RELAXED);              \
+                                                                       \
+                 __atomic_store_n (&LOCKVAR, unlocked,                 \
+                                   MEMMODEL_RELEASE);                  \
+                 break;                                                \
+               }                                                       \
+           }                                                           \
+       }                                                               \
+  }
+
+#define TEST(N, LOCKVAR, VERIFY, N_GANGS, N_WORKERS)                   \
+  {                                                                    \
+    spin_cnt_max_hit = 0;                                              \
+                                                                       \
+    if (VERIFY)                                                                
\
+      lock_cnt = 0;                                                    \
+                                                                       \
+    PRINTF ("%s - verify=%u - lock=%s - gangs=%u - workers=%u ... ",   \
+           XSTR (TYPE), VERIFY, STR(LOCKVAR), N_GANGS, N_WORKERS);     \
+    TEST_1 (N, LOCKVAR, VERIFY, N_GANGS, N_WORKERS);                   \
+    PRINTF ("done\n");                                                 \
+                                                                       \
+    if (VERIFY && SPIN_CNT_MAX)                                                
\
+      PRINTF ("spin_cnt_max_hit: %llu\n", spin_cnt_max_hit);           \
+                                                                       \
+    if (VERIFY && (lock_cnt + spin_cnt_max_hit != N))                  \
+      {                                                                        
\
+       PRINTF ("lock_cnt: %llu\n", lock_cnt);                          \
+       PRINTF ("lock_cnt + spin_cnt_max_hit: %llu\n",                  \
+               lock_cnt + spin_cnt_max_hit);                           \
+       PRINTF ("N: %u\n", N);                                          \
+       __builtin_abort ();                                             \
+      }                                                                        
\
+  }
+
+/* Uses .global addressing on nvptx.  */
+TYPE LOCKVAR1;
+#pragma acc declare create (LOCKVAR1)
+
+void
+TESTS (unsigned int n)
+{
+  unsigned long long int lock_cnt;
+  unsigned long long int spin_cnt_max_hit;
+
+  /* Uses generic addressing on nvptx.  */
+  TYPE LOCKVAR2;
+#pragma acc declare create (LOCKVAR2)
+
+#define N_GANGS 1
+#define N_WORKERS 8
+#define VERIFY 0
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#define VERIFY 1
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#undef N_WORKERS
+#undef N_GANGS
+
+#define N_GANGS 2
+#define N_WORKERS 4
+#define VERIFY 0
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#define VERIFY 1
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#undef N_WORKERS
+#undef N_GANGS
+
+#define N_GANGS 4
+#define N_WORKERS 2
+#define VERIFY 0
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#define VERIFY 1
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#undef N_WORKERS
+#undef N_GANGS
+
+#define N_GANGS 8
+#define N_WORKERS 1
+#define VERIFY 0
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#define VERIFY 1
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#undef N_WORKERS
+#undef N_GANGS
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/spin-lock-shared.c 
b/libgomp/testsuite/libgomp.oacc-c/spin-lock-shared.c
new file mode 100644
index 00000000000..81d18fcc798
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/spin-lock-shared.c
@@ -0,0 +1,35 @@
+#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
+#include <assert.h>
+
+enum memmodel
+  {
+    MEMMODEL_RELAXED = 0,
+    MEMMODEL_ACQUIRE = 2,
+    MEMMODEL_RELEASE = 3,
+    MEMMODEL_SEQ_CST = 5,
+  };
+
+#define TYPE unsigned int
+#define TESTS tests_32
+#include "spin-lock-shared.h"
+#undef TYPE
+#undef TESTS
+
+#define TYPE unsigned long long int
+#define TESTS tests_64
+#include "spin-lock-shared.h"
+#undef TYPE
+#undef TESTS
+
+#define N (50 * 1000)
+
+int
+main (void)
+{
+  tests_32 (N);
+  tests_64 (N);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/spin-lock-shared.h 
b/libgomp/testsuite/libgomp.oacc-c/spin-lock-shared.h
new file mode 100644
index 00000000000..923f38c60fe
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/spin-lock-shared.h
@@ -0,0 +1,135 @@
+#define XSTR(S) STR (S)
+#define STR(S) #S
+
+#define PRINTF(...)                            \
+  {                                            \
+    printf (__VA_ARGS__);                      \
+    fflush (NULL);                             \
+  }
+
+#define DO_PRAGMA(x) _Pragma (#x)
+
+#ifndef SPIN_CNT_MAX
+/* Define to have limited-spin spinlock.
+   Ensures that the program will terminate.  */
+#define SPIN_CNT_MAX 0x20000U
+#endif
+
+#define TEST_1(N, LOCKREF)                                             \
+  DO_PRAGMA (acc parallel                                              \
+            num_gangs(1)                                               \
+            num_workers(N_WORKERS)                                     \
+            copy (lock_cnt)                                            \
+            copy (spin_cnt_max_hit))                                   \
+  {                                                                    \
+    TYPE unlocked = (TYPE)0;                                           \
+    TYPE locked = ~unlocked;                                           \
+    TYPE lock;                                                         \
+    TYPE *volatile lock_ptr = &lock;                                   \
+    unsigned long long int lock_cnt_1;                                 \
+    unsigned long long int spin_cnt_max_hit_1;                         \
+                                                                       \
+    if (VERIFY)                                                                
\
+      {                                                                        
\
+       lock_cnt_1 = 0;                                                 \
+                                                                       \
+       if (SPIN_CNT_MAX)                                               \
+         spin_cnt_max_hit_1 = 0;                                       \
+      }                                                                        
\
+                                                                       \
+    *(LOCKREF) = unlocked;                                             \
+                                                                       \
+    DO_PRAGMA (acc loop worker)                                                
\
+      for (unsigned int i = 0; i < N; i++)                             \
+       {                                                               \
+         TYPE res;                                                     \
+                                                                       \
+         unsigned int spin_cnt = 0;                                    \
+         while (1)                                                     \
+           {                                                           \
+             res = __atomic_exchange_n (LOCKREF, locked,               \
+                                        MEMMODEL_ACQUIRE);             \
+             if (res == locked)                                        \
+               {                                                       \
+                 if (SPIN_CNT_MAX > 0)                                 \
+                   {                                                   \
+                     spin_cnt++;                                       \
+                     if (spin_cnt == SPIN_CNT_MAX)                     \
+                       {                                               \
+                         if (VERIFY)                                   \
+                           __atomic_fetch_add (&spin_cnt_max_hit_1, 1, \
+                                               MEMMODEL_RELAXED);      \
+                         break;                                        \
+                       }                                               \
+                   }                                                   \
+                 continue;                                             \
+               }                                                       \
+             else                                                      \
+               {                                                       \
+                 if (res != unlocked)                                  \
+                   __builtin_abort ();                                 \
+                                                                       \
+                 if (VERIFY)                                           \
+                   __atomic_fetch_add (&lock_cnt_1, 1,                 \
+                                       MEMMODEL_RELAXED);              \
+                                                                       \
+                 __atomic_store_n (LOCKREF, unlocked,                  \
+                                   MEMMODEL_RELEASE);                  \
+                                                                       \
+                 break;                                                \
+               }                                                       \
+           }                                                           \
+       }                                                               \
+                                                                       \
+    if (VERIFY)                                                                
\
+      {                                                                        
\
+       lock_cnt += lock_cnt_1;                                         \
+                                                                       \
+       if (SPIN_CNT_MAX)                                               \
+         spin_cnt_max_hit += spin_cnt_max_hit_1;                       \
+      }                                                                        
\
+  }
+
+#define TEST(N, LOCKREF)                                       \
+  {                                                            \
+    spin_cnt_max_hit = 0;                                      \
+                                                               \
+    if (VERIFY)                                                        \
+      lock_cnt = 0;                                            \
+                                                               \
+    PRINTF ("%s - verify=%u - LOCKREF=%s ... ",                        \
+           XSTR (TYPE), VERIFY, #LOCKREF);                     \
+    TEST_1 (N, LOCKREF);                                       \
+    PRINTF ("done\n");                                         \
+                                                               \
+    if (VERIFY && SPIN_CNT_MAX)                                        \
+      PRINTF ("spin_cnt_max_hit: %llu\n", spin_cnt_max_hit);   \
+                                                               \
+    if (VERIFY && (lock_cnt + spin_cnt_max_hit != N))          \
+      {                                                                \
+       PRINTF ("lock_cnt: %llu\n", lock_cnt);                  \
+       PRINTF ("lock_cnt + spin_cnt_max_hit: %llu\n",          \
+               lock_cnt + spin_cnt_max_hit);                   \
+       PRINTF ("N: %u\n", N);                                  \
+       __builtin_abort ();                                     \
+      }                                                                \
+  }
+
+void
+TESTS (unsigned int n)
+{
+  unsigned long long int lock_cnt;
+  unsigned long long int spin_cnt_max_hit;
+
+#define N_WORKERS 8
+
+#define VERIFY 0
+  TEST (n, &lock);
+  TEST (n, lock_ptr);
+#undef VERIFY
+
+#define VERIFY 1
+  TEST (n, &lock);
+  TEST (n, lock_ptr);
+#undef VERIFY
+}

Reply via email to