On Wed, Sep 19, 2018, 9:31 PM Christian König <[email protected]> wrote:
> Am 18.09.2018 um 18:17 schrieb Nayan Deshmukh: > > having a delayed work item per job is redundant as we only need one > > per scheduler to track the time out the currently executing job. > > Well that looks simpler than I thought it would be. > > But it shows the next problem that the timeout and the completion could > race. > > As far as I can see that can be fixed by moving the > dma_fence_remove_callback()/dma_fence_add_callback() dance from > drm_sched_hw_job_reset() to drm_sched_job_timedout(). > > Anyway, I would say drop patch #1 and fix the one comment below and we > can use this. > > > > > Signed-off-by: Nayan Deshmukh <[email protected]> > > Suggested-by: Christian König <[email protected]> > > --- > > drivers/gpu/drm/scheduler/sched_main.c | 16 +++++++++------- > > include/drm/gpu_scheduler.h | 6 +++--- > > 2 files changed, 12 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c > b/drivers/gpu/drm/scheduler/sched_main.c > > index 0e6ccc8243db..f213b5c7f718 100644 > > --- a/drivers/gpu/drm/scheduler/sched_main.c > > +++ b/drivers/gpu/drm/scheduler/sched_main.c > > @@ -198,7 +198,7 @@ static void drm_sched_job_finish(struct work_struct > *work) > > * manages to find this job as the next job in the list, the fence > > * signaled check below will prevent the timeout to be restarted. > > */ > > - cancel_delayed_work_sync(&s_job->work_tdr); > > + cancel_delayed_work_sync(&sched->work_tdr); > > > > spin_lock(&sched->job_list_lock); > > /* queue TDR for next job */ > > @@ -207,7 +207,7 @@ static void drm_sched_job_finish(struct work_struct > *work) > > if (sched->timeout != MAX_SCHEDULE_TIMEOUT && > > !list_is_last(&s_job->node, &sched->ring_mirror_list)) { > > if (!dma_fence_is_signaled(&next->s_fence->finished)) > > Since we now have only one delayed work item we can just drop the test > if next is already signaled. > Can you elaborate more on this. Which test are you talking about? Regards, Nayan > > > Regards, > Christian. > > > - schedule_delayed_work(&next->work_tdr, > sched->timeout); > > + schedule_delayed_work(&sched->work_tdr, > sched->timeout); > > } > > /* remove job from ring_mirror_list */ > > list_del(&s_job->node); > > @@ -237,7 +237,7 @@ static void drm_sched_job_begin(struct drm_sched_job > *s_job) > > if (list_first_entry_or_null(&sched->ring_mirror_list, > > struct drm_sched_job, node) == s_job) { > > if (sched->timeout != MAX_SCHEDULE_TIMEOUT) > > - schedule_delayed_work(&s_job->work_tdr, > sched->timeout); > > + schedule_delayed_work(&sched->work_tdr, > sched->timeout); > > sched->curr_job = s_job; > > } > > spin_unlock(&sched->job_list_lock); > > @@ -245,8 +245,10 @@ static void drm_sched_job_begin(struct > drm_sched_job *s_job) > > > > static void drm_sched_job_timedout(struct work_struct *work) > > { > > - struct drm_sched_job *job = container_of(work, struct > drm_sched_job, > > - work_tdr.work); > > + struct drm_gpu_scheduler *sched = container_of(work, > > + struct drm_gpu_scheduler, > > + work_tdr.work); > > + struct drm_sched_job *job = sched->curr_job; > > > > job->sched->ops->timedout_job(job); > > } > > @@ -318,7 +320,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler > *sched) > > s_job = list_first_entry_or_null(&sched->ring_mirror_list, > > struct drm_sched_job, node); > > if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) > > - schedule_delayed_work(&s_job->work_tdr, sched->timeout); > > + schedule_delayed_work(&sched->work_tdr, sched->timeout); > > if (s_job) > > sched->curr_job = s_job; > > > > @@ -389,7 +391,6 @@ int drm_sched_job_init(struct drm_sched_job *job, > > > > INIT_WORK(&job->finish_work, drm_sched_job_finish); > > INIT_LIST_HEAD(&job->node); > > - INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout); > > > > return 0; > > } > > @@ -580,6 +581,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > > INIT_LIST_HEAD(&sched->ring_mirror_list); > > spin_lock_init(&sched->job_list_lock); > > atomic_set(&sched->hw_rq_count, 0); > > + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); > > atomic_set(&sched->num_jobs, 0); > > atomic64_set(&sched->job_id_count, 0); > > > > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > > index 07e776b1ca42..9d50d7f3eaa4 100644 > > --- a/include/drm/gpu_scheduler.h > > +++ b/include/drm/gpu_scheduler.h > > @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct > dma_fence *f); > > * finished to remove the job from the > > * @drm_gpu_scheduler.ring_mirror_list. > > * @node: used to append this struct to the > @drm_gpu_scheduler.ring_mirror_list. > > - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after > the timeout > > - * interval is over. > > * @id: a unique id assigned to each job scheduled on the scheduler. > > * @karma: increment on every hang caused by this job. If this exceeds > the hang > > * limit of the scheduler then the job is marked guilty and > will not > > @@ -195,7 +193,6 @@ struct drm_sched_job { > > struct dma_fence_cb finish_cb; > > struct work_struct finish_work; > > struct list_head node; > > - struct delayed_work work_tdr; > > uint64_t id; > > atomic_t karma; > > enum drm_sched_priority s_priority; > > @@ -260,6 +257,8 @@ struct drm_sched_backend_ops { > > * finished. > > * @hw_rq_count: the number of jobs currently in the hardware queue. > > * @job_id_count: used to assign unique id to the each job. > > + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after > the > > + * timeout interval is over. > > * @thread: the kthread on which the scheduler which run. > > * @ring_mirror_list: the list of jobs which are currently in the job > queue. > > * @job_list_lock: lock to protect the ring_mirror_list. > > @@ -280,6 +279,7 @@ struct drm_gpu_scheduler { > > wait_queue_head_t job_scheduled; > > atomic_t hw_rq_count; > > atomic64_t job_id_count; > > + struct delayed_work work_tdr; > > struct task_struct *thread; > > struct list_head ring_mirror_list; > > spinlock_t job_list_lock; > >
_______________________________________________ dri-devel mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/dri-devel
