Signed-off-by: Tvrtko Ursulin <[email protected]>
Cc: Christian König <[email protected]>
Cc: Danilo Krummrich <[email protected]>
Cc: Matthew Brost <[email protected]>
Cc: Philipp Stanner <[email protected]>
Cc: Pierre-Eric Pelloux-Prayer <[email protected]>
---
drivers/gpu/drm/scheduler/sched_entity.c | 1 +
drivers/gpu/drm/scheduler/sched_internal.h | 15 ++++++++++++---
drivers/gpu/drm/scheduler/sched_main.c | 8 +++++++-
drivers/gpu/drm/scheduler/sched_rq.c | 14 ++++++++++++++
include/drm/gpu_scheduler.h | 5 +++++
5 files changed, 39 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c
b/drivers/gpu/drm/scheduler/sched_entity.c
index 58f51875547a..1715e1caec40 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -61,6 +61,7 @@ static struct drm_sched_entity_stats
*drm_sched_entity_stats_alloc(void)
kref_init(&stats->kref);
spin_lock_init(&stats->lock);
+ ewma_drm_sched_avgtime_init(&stats->avg_job_us);
return stats;
}
diff --git a/drivers/gpu/drm/scheduler/sched_internal.h
b/drivers/gpu/drm/scheduler/sched_internal.h
index c94e38acc6f2..a120efc5d763 100644
--- a/drivers/gpu/drm/scheduler/sched_internal.h
+++ b/drivers/gpu/drm/scheduler/sched_internal.h
@@ -20,6 +20,7 @@
* @runtime: time entity spent on the GPU.
* @prev_runtime: previous @runtime used to get the runtime delta
* @vruntime: virtual runtime as accumulated by the fair algorithm
+ * @avg_job_us: average job duration
*/
struct drm_sched_entity_stats {
struct kref kref;
@@ -27,6 +28,8 @@ struct drm_sched_entity_stats {
ktime_t runtime;
ktime_t prev_runtime;
u64 vruntime;
+
+ struct ewma_drm_sched_avgtime avg_job_us;
};
/* Used to choose between FIFO and RR job-scheduling */
@@ -153,20 +156,26 @@ drm_sched_entity_stats_put(struct drm_sched_entity_stats
*stats)
* @job: Scheduler job to account.
*
* Accounts the execution time of @job to its respective entity stats object.
+ *
+ * Returns job's real duration in micro seconds.
*/
-static inline void
+static inline ktime_t
drm_sched_entity_stats_job_add_gpu_time(struct drm_sched_job *job)
{
struct drm_sched_entity_stats *stats = job->entity_stats;
struct drm_sched_fence *s_fence = job->s_fence;
- ktime_t start, end;
+ ktime_t start, end, duration;
start = dma_fence_timestamp(&s_fence->scheduled);
end = dma_fence_timestamp(&s_fence->finished);
+ duration = ktime_sub(end, start);
spin_lock(&stats->lock);
- stats->runtime = ktime_add(stats->runtime, ktime_sub(end, start));
+ stats->runtime = ktime_add(stats->runtime, duration);
+ ewma_drm_sched_avgtime_add(&stats->avg_job_us, ktime_to_us(duration));
spin_unlock(&stats->lock);
+
+ return duration;
}
#endif
diff --git a/drivers/gpu/drm/scheduler/sched_main.c
b/drivers/gpu/drm/scheduler/sched_main.c
index 8d8f9c8411f5..204d99c6699f 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1000,7 +1000,12 @@ static void drm_sched_free_job_work(struct work_struct
*w)
struct drm_sched_job *job;
while ((job = drm_sched_get_finished_job(sched))) {
- drm_sched_entity_stats_job_add_gpu_time(job);
+ ktime_t duration = drm_sched_entity_stats_job_add_gpu_time(job);
+
+ /* Serialized by the worker. */
+ ewma_drm_sched_avgtime_add(&sched->avg_job_us,
+ ktime_to_us(duration));
+
sched->ops->free_job(job);
}
@@ -1158,6 +1163,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_
atomic_set(&sched->_score, 0);
atomic64_set(&sched->job_id_count, 0);
sched->pause_submit = false;
+ ewma_drm_sched_avgtime_init(&sched->avg_job_us);
sched->ready = true;
return 0;
diff --git a/drivers/gpu/drm/scheduler/sched_rq.c
b/drivers/gpu/drm/scheduler/sched_rq.c
index b868c794cc9d..02742869e75b 100644
--- a/drivers/gpu/drm/scheduler/sched_rq.c
+++ b/drivers/gpu/drm/scheduler/sched_rq.c
@@ -150,6 +150,20 @@ drm_sched_entity_restore_vruntime(struct drm_sched_entity
*entity,
* Higher priority can go first.
*/
vruntime = -us_to_ktime(rq_prio - prio);
+ } else {
+ struct drm_gpu_scheduler *sched = entity->rq->sched;
+
+ /*
+ * Favour entity with shorter jobs (interactivity).
+ *
+ * (Unlocked read is fine since it is just heuristics.)
+ *
+ */
+ if (ewma_drm_sched_avgtime_read(&stats->avg_job_us) <=
+ ewma_drm_sched_avgtime_read(&sched->avg_job_us))
+ vruntime = -1;
+ else
+ vruntime = 1;
}
}
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index bc25508a6ff6..a7e407e04ce0 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -25,11 +25,14 @@
#define _DRM_GPU_SCHEDULER_H_
#include <drm/spsc_queue.h>
+#include <linux/average.h>
#include <linux/dma-fence.h>
#include <linux/completion.h>
#include <linux/xarray.h>
#include <linux/workqueue.h>
+DECLARE_EWMA(drm_sched_avgtime, 6, 4);
+
#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
/**
@@ -581,6 +584,7 @@ struct drm_sched_backend_ops {
* @job_id_count: used to assign unique id to the each job.
* @submit_wq: workqueue used to queue @work_run_job and @work_free_job
* @timeout_wq: workqueue used to queue @work_tdr
+ * @avg_job_us: Average job duration
* @work_run_job: work which calls run_job op of each scheduler.
* @work_free_job: work which calls free_job op of each scheduler.
* @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
@@ -612,6 +616,7 @@ struct drm_gpu_scheduler {
atomic64_t job_id_count;
struct workqueue_struct *submit_wq;
struct workqueue_struct *timeout_wq;
+ struct ewma_drm_sched_avgtime avg_job_us;
struct work_struct work_run_job;
struct work_struct work_free_job;
struct delayed_work work_tdr;