On Sun, Apr 03, 2011 at 08:10:25PM +0200, Jakub Jelinek wrote:
> On Sun, Apr 03, 2011 at 07:27:12PM +0900, Sho Nakatani wrote:
> > Then, I'll compare the trees created by gcc and icc, and point out
> > that the implementation of OpenMP Task uses Lazy Task Creation while
> > gcc does not.
>
> Depends on what you mean by lazy task creation, gcc schedules
> tasks lazily if they aren't if (0), some data structure if created
> for them when encountering #pragma omp task directive, but I guess
> any implementation will do something like that.
>
> What your testcase shows is not whether tasks are created lazily or not, but
> how good/poor #pragma omp taskwait implementation is. And, for your testcase
> libgomp/task.c (GOMP_taskwait) definitely could be improved. Currently it
> only
> tries to schedule in children that will be awaited by the current tasks and if
> there are no such children, goes to sleep, waiting for them to complete.
> Scheduling in random unrelated tasks is problematic, because the unrelated
> task might take too long to complete and delay the taskwait for way too long
> (note, gcc doesn't have untied tasks, all tasks are tied once they are
> scheduled
> onto some particular tasks - setcontext/swapcontext is quite fragile thing to
> do).
> But it is true it could very well schedule tasks that are taskwaited by tasks
> taskwaited by current task, and transitively further. Plus, be able to
> temporarily
> awake such a sleeping thread if there are tasks it can transitively taskwait
> for, as if those don't complete, the current taskwait won't return.
Just FYI, I've tried to implement something like that as a quick hack,
but it unfortunately slowed things down, at least on the attached fib testcase
with arguments 40 25. Guess partly the problem is that after a task waiting
in taskwait_sem is awaken it now needs to take task_lock lock to unqueue itself
from the new in_taskwait_list, and partly because the search for grand-grand
children
etc. is more expensive, the FIFO isn't a good data structure for that.
Jakub
--- libgomp/team.c.jj 2011-04-04 18:14:58.000000000 +0200
+++ libgomp/team.c 2011-04-04 20:00:45.000000000 +0200
@@ -166,6 +166,7 @@ gomp_new_team (unsigned nthreads)
gomp_mutex_init (&team->task_lock);
team->task_queue = NULL;
+ team->in_taskwait_list = NULL;
team->task_count = 0;
team->task_running_count = 0;
--- libgomp/libgomp.h.jj 2011-04-04 18:19:46.000000000 +0200
+++ libgomp/libgomp.h 2011-04-04 20:00:45.000000000 +0200
@@ -311,6 +311,7 @@ struct gomp_team
gomp_mutex_t task_lock;
struct gomp_task *task_queue;
+ struct gomp_task *in_taskwait_list;
int task_count;
int task_running_count;
--- libgomp/task.c.jj 2009-04-14 16:33:07.000000000 +0200
+++ libgomp/task.c 2011-04-04 20:02:18.000000000 +0200
@@ -176,6 +176,26 @@ GOMP_task (void (*fn) (void *), void *da
gomp_team_barrier_set_task_pending (&team->barrier);
do_wake = team->task_running_count + !parent->in_tied_task
< team->nthreads;
+ if (!do_wake && team->in_taskwait_list)
+ {
+ struct gomp_task *t = team->in_taskwait_list;
+ do
+ {
+ struct gomp_task *p = parent;
+ int i;
+
+ for (i = 0; i < 10 && p; i++, p = p->parent)
+ if (p == t || p->kind == GOMP_TASK_IMPLICIT)
+ break;
+ if (p == t)
+ {
+ gomp_sem_post (&t->taskwait_sem);
+ break;
+ }
+ t = t->next_queue;
+ }
+ while (t != team->in_taskwait_list);
+ }
gomp_mutex_unlock (&team->task_lock);
if (do_wake)
gomp_team_barrier_wake (&team->barrier, 1);
@@ -301,10 +321,35 @@ GOMP_taskwait (void)
}
return;
}
- if (task->children->kind == GOMP_TASK_WAITING)
+ child_task = task->children;
+ if (child_task->kind != GOMP_TASK_WAITING && team->task_queue)
+ {
+ /* Try harder, look for grandchildren etc. */
+ for (child_task = team->task_queue;;
+ child_task = child_task->next_queue)
+ {
+ if (child_task->kind == GOMP_TASK_WAITING)
+ {
+ struct gomp_task *p = child_task->parent;
+ int i;
+
+ for (i = 0; i < 10 && p; i++, p = p->parent)
+ if (p == task || p->kind == GOMP_TASK_IMPLICIT)
+ break;
+ if (p == task)
+ break;
+ }
+ if (child_task->next_queue == team->task_queue)
+ {
+ child_task = task->children;
+ break;
+ }
+ }
+ }
+ if (child_task->kind == GOMP_TASK_WAITING)
{
- child_task = task->children;
- task->children = child_task->next_child;
+ if (child_task->parent->children == child_task)
+ child_task->parent->children = child_task->next_child;
child_task->prev_queue->next_queue = child_task->next_queue;
child_task->next_queue->prev_queue = child_task->prev_queue;
if (team->task_queue == child_task)
@@ -320,9 +365,25 @@ GOMP_taskwait (void)
gomp_team_barrier_clear_task_pending (&team->barrier);
}
else
- /* All tasks we are waiting for are already running
- in other threads. Wait for them. */
- task->in_taskwait = true;
+ {
+ child_task = NULL;
+ /* All tasks we are waiting for are already running
+ in other threads. Wait for them. */
+ task->in_taskwait = true;
+ if (team->in_taskwait_list)
+ {
+ task->next_queue = team->in_taskwait_list;
+ task->prev_queue = team->in_taskwait_list->prev_queue;
+ task->next_queue->prev_queue = task;
+ task->prev_queue->next_queue = task;
+ }
+ else
+ {
+ task->next_queue = task;
+ task->prev_queue = task;
+ team->in_taskwait_list = task;
+ }
+ }
gomp_mutex_unlock (&team->task_lock);
if (to_free)
{
@@ -337,22 +398,23 @@ GOMP_taskwait (void)
thr->task = task;
}
else
- {
- gomp_sem_wait (&task->taskwait_sem);
- task->in_taskwait = false;
- return;
- }
+ gomp_sem_wait (&task->taskwait_sem);
gomp_mutex_lock (&team->task_lock);
if (child_task)
{
+ struct gomp_task *parent = child_task->parent;
child_task->prev_child->next_child = child_task->next_child;
child_task->next_child->prev_child = child_task->prev_child;
- if (task->children == child_task)
+ if (parent->children == child_task)
{
if (child_task->next_child != child_task)
- task->children = child_task->next_child;
+ parent->children = child_task->next_child;
else
- task->children = NULL;
+ {
+ parent->children = NULL;
+ if (parent->in_taskwait)
+ gomp_sem_post (&parent->taskwait_sem);
+ }
}
gomp_clear_parent (child_task->children);
to_free = child_task;
@@ -360,5 +422,14 @@ GOMP_taskwait (void)
team->task_count--;
team->task_running_count--;
}
+ else
+ {
+ task->in_taskwait = false;
+ task->prev_queue->next_queue = task->next_queue;
+ task->next_queue->prev_queue = task->prev_queue;
+ if (team->in_taskwait_list == task)
+ team->in_taskwait_list
+ = task->next_queue == task ? NULL : task->next_queue;
+ }
}
}
#include <omp.h>
#include <stdio.h>
long
fib (long n, long l)
{
long i, j;
if (n < 2)
return n;
else if (l)
{
#pragma omp task shared(i) firstprivate(n, l)
i = fib (n - 1, l - 1);
#pragma omp task shared(j) firstprivate(n, l)
j = fib (n - 2, l - 1);
#pragma omp taskwait
}
else
{
i = fib (n - 1, 0);
j = fib (n - 2, 0);
}
return i + j;
}
int
main (int argc, char *argv[])
{
long n = argv[1] ? atoi (argv[1]) : 50;
long l = argv[1] && argv[2] ? atoi (argv[2]) : 10;
double t1, t2;
long result;
omp_set_dynamic (0);
#pragma omp parallel shared(n)
#pragma omp single
{
t1 = omp_get_wtime ();
result = fib (n, l);
t2 = omp_get_wtime ();
printf ("fib (%ld, %ld) %5f %ld\n", n, l, t2 - t1, result);
}
return 0;
}