Ping.
Hi,
I’ve recently started to work on libgomp with the goal of proposing
a new way of handling queues of tasks based on the work done by a
PhD student.
While working on libgomp’s code I noticed two things that puzzled me:
- The code uses gcc’s atomic builtins but doesn’t use the
__ATOMIC_* constants to select the desired memory model for the
operation. Instead it uses the MEMMODEL_* constants defined in
libgomp.h. Is there a good reason for that? I’d be very tempted to
(possibly) “fix” it with the attached patch.
- When using the OMP_STACKSIZE environment variable to select
the stack size of the threads, it only applies to the threads created
by pthread_create and has no effect on the main thread of the process.
This behaviour looks compliant with version 3.1 of the spec:
“OMP_STACKSIZE sets the stacksize-var ICV that specifies the size of
the stack for threads created by the OpenMP implementation”
but I was wondering if it was the best thing to do? I discovered this
when playing with the UTS benchmark of the BOTS benchmark suite which
can require quite big stacks for some input datasets. It uses
OMP_STACKSIZE to set its requirements but that doesn’t prevent a task
with stack requirements bigger than the default size to be scheduled
on the main thread of the process, leading the application to crash
because of a stack overflow.
Should the application be setting itself the size of its main thread’s
stack? Shouldn’t something be done in the compiler/runtime to handle
this? That wouldn’t be not compliant with the spec and much more
intuitive to the programmer: “I can expect that every thread will have
OMP_STACKSIZE worth of stack”.
I should hopefully write again soon with some more useful patches and
proposals. In the meantime, thank you for your answers.
Regards,
Kévin
From b1a6f296d6c4304b41f735859e226213881f861e Mon Sep 17 00:00:00 2001
From: Kevin PETIT <[email protected]>
Date: Mon, 20 May 2013 13:28:40 +0100
Subject: [PATCH] Don't redefine an equivalent to the __ATOMIC_* constants
If we can use the __atomic_* builtins it means that we also have
the __ATOMIC_* constants available. This patch removes the
definition of the MEMMODEL_* constants in libgomp.h and changes
the code to use __ATOMIC_*.
---
libgomp/config/linux/affinity.c | 2 +-
libgomp/config/linux/bar.c | 10 +++++-----
libgomp/config/linux/bar.h | 8 ++++----
libgomp/config/linux/lock.c | 10 +++++-----
libgomp/config/linux/mutex.c | 6 +++---
libgomp/config/linux/mutex.h | 4 ++--
libgomp/config/linux/ptrlock.c | 6 +++---
libgomp/config/linux/ptrlock.h | 6 +++---
libgomp/config/linux/sem.c | 6 +++---
libgomp/config/linux/sem.h | 4 ++--
libgomp/config/linux/wait.h | 2 +-
libgomp/critical.c | 2 +-
libgomp/libgomp.h | 11 -----------
libgomp/ordered.c | 4 ++--
libgomp/task.c | 4 ++--
15 files changed, 37 insertions(+), 48 deletions(-)
diff --git a/libgomp/config/linux/affinity.c b/libgomp/config/linux/affinity.c
index dc6c7e5..fcbc323 100644
--- a/libgomp/config/linux/affinity.c
+++ b/libgomp/config/linux/affinity.c
@@ -108,7 +108,7 @@ gomp_init_thread_affinity (pthread_attr_t *attr)
unsigned int cpu;
cpu_set_t cpuset;
- cpu = __atomic_fetch_add (&affinity_counter, 1, MEMMODEL_RELAXED);
+ cpu = __atomic_fetch_add (&affinity_counter, 1, __ATOMIC_RELAXED);
cpu %= gomp_cpu_affinity_len;
CPU_ZERO (&cpuset);
CPU_SET (gomp_cpu_affinity[cpu], &cpuset);
diff --git a/libgomp/config/linux/bar.c b/libgomp/config/linux/bar.c
index 35baa88..8a81f7c 100644
--- a/libgomp/config/linux/bar.c
+++ b/libgomp/config/linux/bar.c
@@ -38,14 +38,14 @@ gomp_barrier_wait_end (gomp_barrier_t *bar,
gomp_barrier_state_t state)
/* Next time we'll be awaiting TOTAL threads again. */
bar->awaited = bar->total;
__atomic_store_n (&bar->generation, bar->generation + 4,
- MEMMODEL_RELEASE);
+ __ATOMIC_RELEASE);
futex_wake ((int *) &bar->generation, INT_MAX);
}
else
{
do
do_wait ((int *) &bar->generation, state);
- while (__atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE) == state);
+ while (__atomic_load_n (&bar->generation, __ATOMIC_ACQUIRE) == state);
}
}
@@ -95,7 +95,7 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar,
gomp_barrier_state_t state)
}
else
{
- __atomic_store_n (&bar->generation, state + 3, MEMMODEL_RELEASE);
+ __atomic_store_n (&bar->generation, state + 3, __ATOMIC_RELEASE);
futex_wake ((int *) &bar->generation, INT_MAX);
return;
}
@@ -105,11 +105,11 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar,
gomp_barrier_state_t state)
do
{
do_wait ((int *) &bar->generation, generation);
- gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ gen = __atomic_load_n (&bar->generation, __ATOMIC_ACQUIRE);
if (__builtin_expect (gen & 1, 0))
{
gomp_barrier_handle_tasks (state);
- gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ gen = __atomic_load_n (&bar->generation, __ATOMIC_ACQUIRE);
}
if ((gen & 2) != 0)
generation |= 2;
diff --git a/libgomp/config/linux/bar.h b/libgomp/config/linux/bar.h
index 69b9706..e985449 100644
--- a/libgomp/config/linux/bar.h
+++ b/libgomp/config/linux/bar.h
@@ -50,7 +50,7 @@ static inline void gomp_barrier_init (gomp_barrier_t *bar,
unsigned count)
static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count)
{
- __atomic_add_fetch (&bar->awaited, count - bar->total, MEMMODEL_ACQ_REL);
+ __atomic_add_fetch (&bar->awaited, count - bar->total, __ATOMIC_ACQ_REL);
bar->total = count;
}
@@ -69,13 +69,13 @@ extern void gomp_team_barrier_wake (gomp_barrier_t *, int);
static inline gomp_barrier_state_t
gomp_barrier_wait_start (gomp_barrier_t *bar)
{
- unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE) & ~3;
+ unsigned int ret = __atomic_load_n (&bar->generation, __ATOMIC_ACQUIRE) & ~3;
/* A memory barrier is needed before exiting from the various forms
of gomp_barrier_wait, to satisfy OpenMP API version 3.1 section
2.8.6 flush Construct, which says there is an implicit flush during
a barrier region. This is a convenient place to add the barrier,
- so we use MEMMODEL_ACQ_REL here rather than MEMMODEL_ACQUIRE. */
- ret += __atomic_add_fetch (&bar->awaited, -1, MEMMODEL_ACQ_REL) == 0;
+ so we use __ATOMIC_ACQ_REL here rather than __ATOMIC_ACQUIRE. */
+ ret += __atomic_add_fetch (&bar->awaited, -1, __ATOMIC_ACQ_REL) == 0;
return ret;
}
diff --git a/libgomp/config/linux/lock.c b/libgomp/config/linux/lock.c
index 9cdde5c..b81bf1c 100644
--- a/libgomp/config/linux/lock.c
+++ b/libgomp/config/linux/lock.c
@@ -65,7 +65,7 @@ gomp_test_lock_30 (omp_lock_t *lock)
int oldval = 0;
return __atomic_compare_exchange_n (lock, &oldval, 1, false,
- MEMMODEL_ACQUIRE, MEMMODEL_RELAXED);
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
}
void
@@ -114,7 +114,7 @@ gomp_test_nest_lock_30 (omp_nest_lock_t *lock)
oldval = 0;
if (__atomic_compare_exchange_n (&lock->lock, &oldval, 1, false,
- MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
{
lock->owner = me;
lock->count = 1;
@@ -192,7 +192,7 @@ gomp_set_nest_lock_25 (omp_nest_lock_25_t *lock)
{
otid = 0;
if (__atomic_compare_exchange_n (&lock->owner, &otid, tid, false,
- MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
{
lock->count = 1;
return;
@@ -214,7 +214,7 @@ gomp_unset_nest_lock_25 (omp_nest_lock_25_t *lock)
if (--lock->count == 0)
{
- __atomic_store_n (&lock->owner, 0, MEMMODEL_RELEASE);
+ __atomic_store_n (&lock->owner, 0, __ATOMIC_RELEASE);
futex_wake (&lock->owner, 1);
}
}
@@ -226,7 +226,7 @@ gomp_test_nest_lock_25 (omp_nest_lock_25_t *lock)
otid = 0;
if (__atomic_compare_exchange_n (&lock->owner, &otid, tid, false,
- MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
{
lock->count = 1;
return 1;
diff --git a/libgomp/config/linux/mutex.c b/libgomp/config/linux/mutex.c
index 0e3872c..90e749d 100644
--- a/libgomp/config/linux/mutex.c
+++ b/libgomp/config/linux/mutex.c
@@ -40,7 +40,7 @@ gomp_mutex_lock_slow (gomp_mutex_t *mutex, int oldval)
if (do_spin (mutex, 1))
{
/* Spin timeout, nothing changed. Set waiting flag. */
- oldval = __atomic_exchange_n (mutex, -1, MEMMODEL_ACQUIRE);
+ oldval = __atomic_exchange_n (mutex, -1, __ATOMIC_ACQUIRE);
if (oldval == 0)
return;
futex_wait (mutex, -1);
@@ -51,14 +51,14 @@ gomp_mutex_lock_slow (gomp_mutex_t *mutex, int oldval)
/* Something changed. If now unlocked, we're good to go. */
oldval = 0;
if (__atomic_compare_exchange_n (mutex, &oldval, 1, false,
- MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
return;
}
}
/* Second loop waits until mutex is unlocked. We always exit this
loop with wait flag set, so next unlock will awaken a thread. */
- while ((oldval = __atomic_exchange_n (mutex, -1, MEMMODEL_ACQUIRE)))
+ while ((oldval = __atomic_exchange_n (mutex, -1, __ATOMIC_ACQUIRE)))
do_wait (mutex, -1);
}
diff --git a/libgomp/config/linux/mutex.h b/libgomp/config/linux/mutex.h
index e789ed0..b22078c 100644
--- a/libgomp/config/linux/mutex.h
+++ b/libgomp/config/linux/mutex.h
@@ -52,14 +52,14 @@ gomp_mutex_lock (gomp_mutex_t *mutex)
{
int oldval = 0;
if (!__atomic_compare_exchange_n (mutex, &oldval, 1, false,
- MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
gomp_mutex_lock_slow (mutex, oldval);
}
static inline void
gomp_mutex_unlock (gomp_mutex_t *mutex)
{
- int wait = __atomic_exchange_n (mutex, 0, MEMMODEL_RELEASE);
+ int wait = __atomic_exchange_n (mutex, 0, __ATOMIC_RELEASE);
if (__builtin_expect (wait < 0, 0))
gomp_mutex_unlock_slow (mutex);
}
diff --git a/libgomp/config/linux/ptrlock.c b/libgomp/config/linux/ptrlock.c
index fa51111..7f1b17e 100644
--- a/libgomp/config/linux/ptrlock.c
+++ b/libgomp/config/linux/ptrlock.c
@@ -37,7 +37,7 @@ gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock)
uintptr_t oldval = 1;
__atomic_compare_exchange_n (ptrlock, &oldval, 2, false,
- MEMMODEL_RELAXED, MEMMODEL_RELAXED);
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
/* futex works on ints, not pointers.
But a valid work share pointer will be at least
@@ -50,9 +50,9 @@ gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock)
#endif
do
do_wait (intptr, 2);
- while (__atomic_load_n (intptr, MEMMODEL_RELAXED) == 2);
+ while (__atomic_load_n (intptr, __ATOMIC_RELAXED) == 2);
__asm volatile ("" : : : "memory");
- return (void *) __atomic_load_n (ptrlock, MEMMODEL_ACQUIRE);
+ return (void *) __atomic_load_n (ptrlock, __ATOMIC_ACQUIRE);
}
void
diff --git a/libgomp/config/linux/ptrlock.h b/libgomp/config/linux/ptrlock.h
index 8de1101..47d79d5 100644
--- a/libgomp/config/linux/ptrlock.h
+++ b/libgomp/config/linux/ptrlock.h
@@ -48,13 +48,13 @@ static inline void *gomp_ptrlock_get (gomp_ptrlock_t
*ptrlock)
{
uintptr_t oldval;
- uintptr_t v = (uintptr_t) __atomic_load_n (ptrlock, MEMMODEL_ACQUIRE);
+ uintptr_t v = (uintptr_t) __atomic_load_n (ptrlock, __ATOMIC_ACQUIRE);
if (v > 2)
return (void *) v;
oldval = 0;
if (__atomic_compare_exchange_n (ptrlock, &oldval, 1, false,
- MEMMODEL_ACQUIRE, MEMMODEL_ACQUIRE))
+ __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE))
return NULL;
return gomp_ptrlock_get_slow (ptrlock);
@@ -63,7 +63,7 @@ static inline void *gomp_ptrlock_get (gomp_ptrlock_t *ptrlock)
extern void gomp_ptrlock_set_slow (gomp_ptrlock_t *ptrlock);
static inline void gomp_ptrlock_set (gomp_ptrlock_t *ptrlock, void *ptr)
{
- void *wait = __atomic_exchange_n (ptrlock, ptr, MEMMODEL_RELEASE);
+ void *wait = __atomic_exchange_n (ptrlock, ptr, __ATOMIC_RELEASE);
if ((uintptr_t) wait != 1)
gomp_ptrlock_set_slow (ptrlock);
}
diff --git a/libgomp/config/linux/sem.c b/libgomp/config/linux/sem.c
index b25005a..978607f 100644
--- a/libgomp/config/linux/sem.c
+++ b/libgomp/config/linux/sem.c
@@ -36,7 +36,7 @@ gomp_sem_wait_slow (gomp_sem_t *sem, int count)
if (do_spin (sem, 0)
/* Spin timeout, nothing changed. Set waiting flag. */
&& __atomic_compare_exchange_n (sem, &count, SEM_WAIT, false,
- MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
{
futex_wait (sem, SEM_WAIT);
count = *sem;
@@ -47,7 +47,7 @@ gomp_sem_wait_slow (gomp_sem_t *sem, int count)
1))
{
if (__atomic_compare_exchange_n (sem, &count, count - SEM_INC, false,
- MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
return;
}
@@ -61,7 +61,7 @@ gomp_sem_wait_slow (gomp_sem_t *sem, int count)
if (wake != 0)
newval |= wake - SEM_INC;
if (__atomic_compare_exchange_n (sem, &count, newval, false,
- MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
{
if (wake != 0)
{
diff --git a/libgomp/config/linux/sem.h b/libgomp/config/linux/sem.h
index 95e1442..e6d315d 100644
--- a/libgomp/config/linux/sem.h
+++ b/libgomp/config/linux/sem.h
@@ -59,7 +59,7 @@ gomp_sem_wait (gomp_sem_t *sem)
while ((count & ~SEM_WAIT) != 0)
if (__atomic_compare_exchange_n (sem, &count, count - SEM_INC, true,
- MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
return;
gomp_sem_wait_slow (sem, count);
}
@@ -78,7 +78,7 @@ gomp_sem_post (gomp_sem_t *sem)
before it had time to set SEM_WAIT. */
while (!__atomic_compare_exchange_n (sem, &count,
(count + SEM_INC) & ~SEM_WAIT, true,
- MEMMODEL_RELEASE, MEMMODEL_RELAXED))
+ __ATOMIC_RELEASE, __ATOMIC_RELAXED))
continue;
if (__builtin_expect (count & SEM_WAIT, 0))
diff --git a/libgomp/config/linux/wait.h b/libgomp/config/linux/wait.h
index e60f527..55f7b64 100644
--- a/libgomp/config/linux/wait.h
+++ b/libgomp/config/linux/wait.h
@@ -51,7 +51,7 @@ static inline int do_spin (int *addr, int val)
if (__builtin_expect (gomp_managed_threads > gomp_available_cpus, 0))
count = gomp_throttled_spin_count_var;
for (i = 0; i < count; i++)
- if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_RELAXED) != val, 0))
+ if (__builtin_expect (__atomic_load_n (addr, __ATOMIC_RELAXED) != val, 0))
return 0;
else
cpu_relax ();
diff --git a/libgomp/critical.c b/libgomp/critical.c
index 084f01b..5087bbd 100644
--- a/libgomp/critical.c
+++ b/libgomp/critical.c
@@ -34,7 +34,7 @@ void
GOMP_critical_start (void)
{
/* There is an implicit flush on entry to a critical region. */
- __atomic_thread_fence (MEMMODEL_RELEASE);
+ __atomic_thread_fence (__ATOMIC_RELEASE);
gomp_mutex_lock (&default_lock);
}
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 322a435..4ce07dd 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -44,17 +44,6 @@
# pragma GCC visibility push(hidden)
#endif
-/* If we were a C++ library, we'd get this from <std/atomic>. */
-enum memmodel
-{
- MEMMODEL_RELAXED = 0,
- MEMMODEL_CONSUME = 1,
- MEMMODEL_ACQUIRE = 2,
- MEMMODEL_RELEASE = 3,
- MEMMODEL_ACQ_REL = 4,
- MEMMODEL_SEQ_CST = 5
-};
-
#include "sem.h"
#include "mutex.h"
#include "bar.h"
diff --git a/libgomp/ordered.c b/libgomp/ordered.c
index 6b1f5c1..fff01be 100644
--- a/libgomp/ordered.c
+++ b/libgomp/ordered.c
@@ -210,10 +210,10 @@ gomp_ordered_sync (void)
Either way we get correct results.
However, there is an implicit flush on entry to an ordered region,
so we do need to have a barrier here. If we were taking a lock
- this could be MEMMODEL_RELEASE since the acquire would be coverd
+ this could be __ATOMIC_RELEASE since the acquire would be coverd
by the lock. */
- __atomic_thread_fence (MEMMODEL_ACQ_REL);
+ __atomic_thread_fence (__ATOMIC_ACQ_REL);
if (ws->ordered_owner != thr->ts.team_id)
{
gomp_sem_wait (team->ordered_release[thr->ts.team_id]);
diff --git a/libgomp/task.c b/libgomp/task.c
index 7de650a..2be4db3 100644
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -272,7 +272,7 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state)
written by child_task->fn above is flushed
before the NULL is written. */
__atomic_store_n (&parent->children, NULL,
- MEMMODEL_RELEASE);
+ __ATOMIC_RELEASE);
if (parent->in_taskwait)
gomp_sem_post (&parent->taskwait_sem);
}
@@ -312,7 +312,7 @@ GOMP_taskwait (void)
child thread task work function are seen before we exit from
GOMP_taskwait. */
if (task == NULL
- || __atomic_load_n (&task->children, MEMMODEL_ACQUIRE) == NULL)
+ || __atomic_load_n (&task->children, __ATOMIC_ACQUIRE) == NULL)
return;
gomp_mutex_lock (&team->task_lock);
--
1.7.9.5