After commit f7fe64ad0f22 ("drm/sched: Split free_job into own work item")
and with drivers who use the unordered workqueue sched_jobs can be freed
in parallel as soon as the complete_all(&entity->entity_idle) is called.
This makes all dereferencing in the lower part of the worker unsafe so
lets fix it by moving the complete_all() call to after the worker is done
touching the job.

Signed-off-by: Tvrtko Ursulin <[email protected]>
Fixes: f7fe64ad0f22 ("drm/sched: Split free_job into own work item")
Cc: Christian König <[email protected]>
Cc: Danilo Krummrich <[email protected]>
Cc: Matthew Brost <[email protected]>
Cc: Philipp Stanner <[email protected]>
Cc: <[email protected]> # v6.8+
---
 drivers/gpu/drm/scheduler/sched_main.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 57da84908752..f0d02c061c23 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1188,7 +1188,6 @@ static void drm_sched_run_job_work(struct work_struct *w)
                container_of(w, struct drm_gpu_scheduler, work_run_job);
        struct drm_sched_entity *entity;
        struct dma_fence *fence;
-       struct drm_sched_fence *s_fence;
        struct drm_sched_job *sched_job;
        int r;
 
@@ -1207,15 +1206,12 @@ static void drm_sched_run_job_work(struct work_struct 
*w)
                return;
        }
 
-       s_fence = sched_job->s_fence;
-
        atomic_add(sched_job->credits, &sched->credit_count);
        drm_sched_job_begin(sched_job);
 
        trace_drm_run_job(sched_job, entity);
        fence = sched->ops->run_job(sched_job);
-       complete_all(&entity->entity_idle);
-       drm_sched_fence_scheduled(s_fence, fence);
+       drm_sched_fence_scheduled(sched_job->s_fence, fence);
 
        if (!IS_ERR_OR_NULL(fence)) {
                /* Drop for original kref_init of the fence */
@@ -1232,6 +1228,7 @@ static void drm_sched_run_job_work(struct work_struct *w)
                                   PTR_ERR(fence) : 0);
        }
 
+       complete_all(&entity->entity_idle);
        wake_up(&sched->job_scheduled);
        drm_sched_run_job_queue(sched);
 }
-- 
2.47.1

Reply via email to