Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission

Tvrtko Ursulin Mon, 23 Feb 2026 08:25:25 -0800


On 19/02/2026 14:07, Marco Pagani wrote:

Add a new test suite to simulate concurrent job submissions to the DRM
scheduler. The suite includes two initial test cases: (i) a primary test
case for parallel job submission and (ii) a secondary test case for
interleaved job submission and completion. In the first test case, worker
threads concurrently submit jobs to the scheduler and then the timeline is
manually advanced. In the second test case, worker threads periodically
submit a sequence of jobs to the mock scheduler. Periods are chosen as
harmonic, starting from a base period, to allow interleaving between
submission and completion. To avoid impractically large execution times,
periods are cycled. The timeline is advanced automatically when the jobs
completes. This models how job submission from userspace (in process
context) may interleave with job completion (hrtimer callback interrupt
context, in the test case) by a physical device.

Signed-off-by: Marco Pagani <[email protected]>
---
Changes in v1:
- Split the original suite into two test cases (Tvrtko Ursulin).
- General test refactoring and variable renaming for clarity.
- Changed parameters to have a fairer workload distribution.
- Improved cleanup logic.
- Added kunit_info() prints for tracing workers.
---
  drivers/gpu/drm/scheduler/tests/tests_basic.c | 338 ++++++++++++++++++
  1 file changed, 338 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c 
b/drivers/gpu/drm/scheduler/tests/tests_basic.c
index 82a41a456b0a..391edcbaf43a 100644
--- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
+++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
@@ -2,6 +2,8 @@
  /* Copyright (c) 2025 Valve Corporation */

#include <linux/delay.h>

+#include <linux/completion.h>
+#include <linux/workqueue.h>

#include "sched_tests.h"@@ -235,6 +237,341 @@ static void drm_sched_basic_cancel(struct kunit *test)

        KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
  }

+struct sched_concurrent_context {

+       struct drm_mock_scheduler *sched;
+       struct workqueue_struct *sub_wq;
+       struct kunit *test;
+       struct completion wait_go;
+};
+
+KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
+                           struct drm_mock_scheduler *);
+
+KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, 
drm_mock_sched_entity_free,
+                           struct drm_mock_sched_entity *);
+
+static void complete_destroy_workqueue(void *context)
+{
+       struct sched_concurrent_context *ctx = (struct sched_concurrent_context 
*)context;
+
+       complete_all(&ctx->wait_go);
+
+       destroy_workqueue(ctx->sub_wq);
+}
+
+static int drm_sched_concurrent_init(struct kunit *test)
+{
+       struct sched_concurrent_context *ctx;
+       int ret;
+
+       ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+       KUNIT_ASSERT_NOT_NULL(test, ctx);
+
+       init_completion(&ctx->wait_go);
+
+       ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
+
+       ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, 
ctx->sched);
+       KUNIT_ASSERT_EQ(test, ret, 0);
+
+       /* Use an unbounded workqueue to maximize job submission concurrency */
+       ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
+                                     WQ_UNBOUND_MAX_ACTIVE);
+       KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
+
+       ret = kunit_add_action_or_reset(test, complete_destroy_workqueue, ctx);
+       KUNIT_ASSERT_EQ(test, ret, 0);
+
+       ctx->test = test;
+       test->priv = ctx;
+
+       return 0;
+}
+
+struct drm_sched_parallel_params {
+       const char *description;
+       unsigned int num_jobs;
+       unsigned int num_workers;
+};
+
+static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
+       {
+               .description = "Workers submitting multiple jobs against a single 
entity",


Each worker has own entity so the description is a bit confusing.

+               .num_jobs = 4,
+               .num_workers = 16,
+       },
+};
+
+static void
+drm_sched_parallel_desc(const struct drm_sched_parallel_params *params, char 
*desc)
+{
+       strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(drm_sched_parallel, drm_sched_parallel_cases, 
drm_sched_parallel_desc);
+
+struct parallel_worker {
+       struct work_struct work;
+       struct sched_concurrent_context *ctx;
+       struct drm_mock_sched_entity *entity;
+       struct drm_mock_sched_job **jobs;
+       unsigned int id;
+};
+
+static void drm_sched_parallel_worker(struct work_struct *work)
+{
+       const struct drm_sched_parallel_params *params;
+       struct sched_concurrent_context *test_ctx;
+       struct parallel_worker *worker;
+       unsigned int i;
+
+       worker = container_of(work, struct parallel_worker, work);
+       test_ctx = worker->ctx;
+       params = test_ctx->test->param_value;
+
+       wait_for_completion(&test_ctx->wait_go);
+
+       kunit_info(test_ctx->test, "Parallel worker %u started\n", worker->id);
+
+       for (i = 0; i < params->num_jobs; i++)
+               drm_mock_sched_job_submit(worker->jobs[i]);
+}
+
+/*
+ * Spawns workers that submit a sequence of jobs to the mock scheduler.
+ * Once all jobs are submitted, the timeline is manually advanced.
+ */
+static void drm_sched_parallel_submit_test(struct kunit *test)
+{
+       struct sched_concurrent_context *ctx = test->priv;
+       const struct drm_sched_parallel_params *params = test->param_value;
+       struct parallel_worker *workers;
+       unsigned int i, j, completed_jobs;
+       bool done;
+       int ret;
+
+       workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
+                               GFP_KERNEL);
+       KUNIT_ASSERT_NOT_NULL(test, workers);
+
+       /*
+        * Init workers only after all jobs and entities have been successfully
+        * allocated. In this way, the cleanup logic for when an assertion fail
+        * can be simplified.
+        */
+       for (i = 0; i < params->num_workers; i++) {
+               workers[i].id = i;
+               workers[i].ctx = ctx;
+               workers[i].entity = drm_mock_sched_entity_new(test,
+                                                             
DRM_SCHED_PRIORITY_NORMAL,
+                                                             ctx->sched);
+
+               ret = kunit_add_action_or_reset(test, 
drm_mock_sched_entity_free_wrap,
+                                               workers[i].entity);
+               KUNIT_ASSERT_EQ(test, ret, 0);
+
+               workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
+                                               sizeof(*workers[i].jobs), 
GFP_KERNEL);
+               KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
+
+               for (j = 0; j < params->num_jobs; j++)
+                       workers[i].jobs[j] = drm_mock_sched_job_new(test,
+                                                                   
workers[i].entity);
+       }
+
+       for (i = 0; i < params->num_workers; i++) {
+               INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
+               queue_work(ctx->sub_wq, &workers[i].work);
+       }
+
+       complete_all(&ctx->wait_go);
+       flush_workqueue(ctx->sub_wq);
+
+       for (i = 0; i < params->num_workers; i++) {
+               for (j = 0; j < params->num_jobs; j++) {
+                       done = 
drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
+                                                                HZ);
+                       KUNIT_ASSERT_TRUE(test, done);
+
+                       done = 
drm_mock_sched_job_is_finished(workers[i].jobs[j]);
+                       KUNIT_ASSERT_FALSE(test, done);

This second assert does not seem to be bringing much value, but insteadmakes the reader ask themselves why it is there. Remove it?

Hmm in fact this whole loop could be removed. All that it is neededbelow is to wait for the last job to be completed.

+               }
+       }
+
+       completed_jobs = drm_mock_sched_advance(ctx->sched,
+                                               params->num_workers * 
params->num_jobs);
+       KUNIT_ASSERT_EQ(test, completed_jobs, params->num_workers * 
params->num_jobs);
+
+       for (i = 0; i < params->num_workers; i++) {
+               for (j = 0; j < params->num_jobs; j++) {
+                       done = 
drm_mock_sched_job_is_finished(workers[i].jobs[j]);
+                       KUNIT_ASSERT_TRUE(test, done);
+               }
+       }


So here, after advancing you just need to wait on the last job to complete.

Not a deal breaker to have it verbose but usually the less is better andeasier to spot the key thing being tested and what are the functional steps.

+}
+
+struct drm_sched_interleaved_params {
+       const char *description;
+       unsigned int job_base_period_us;
+       unsigned int periods_cycle;
+       unsigned int num_jobs;
+       unsigned int num_workers;
+};
+
+static const struct drm_sched_interleaved_params drm_sched_interleaved_cases[] 
= {
+       {
+               .description = "Workers submitting single jobs against a single 
entity",


As with the parallel description.

+               .job_base_period_us = 100,
+               .periods_cycle = 10,
+               .num_jobs = 1,
+               .num_workers = 32,
+       },
+       {
+               .description = "Workers submitting multiple jobs against a single 
entity",
+               .job_base_period_us = 100,
+               .periods_cycle = 10,
+               .num_jobs = 4,
+               .num_workers = 16,
+       },
+};
+
+static void
+drm_sched_interleaved_desc(const struct drm_sched_interleaved_params *params, 
char *desc)
+{
+       strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(drm_sched_interleaved, drm_sched_interleaved_cases,
+                 drm_sched_interleaved_desc);
+
+struct interleaved_worker {
+       struct work_struct work;
+       struct sched_concurrent_context *ctx;
+       struct drm_mock_sched_entity *entity;
+       struct drm_mock_sched_job **jobs;
+       unsigned int id;
+       unsigned int period_us;
+       unsigned int timeout_us;
+};
+
+static void drm_sched_interleaved_worker(struct work_struct *work)
+{
+       const struct drm_sched_interleaved_params *params;
+       struct sched_concurrent_context *test_ctx;
+       struct interleaved_worker *worker;
+       unsigned int i;
+       bool done;
+
+       worker = container_of(work, struct interleaved_worker, work);
+       test_ctx = worker->ctx;
+       params = test_ctx->test->param_value;
+
+       wait_for_completion(&test_ctx->wait_go);
+
+       kunit_info(test_ctx->test, "Interleaved worker %u with period %u us 
started\n",
+                  worker->id, worker->period_us);
+
+       /* Release jobs as a periodic sequence */
+       for (i = 0; i < params->num_jobs; i++) {
+               drm_mock_sched_job_set_duration_us(worker->jobs[i], 
worker->period_us);
+               drm_mock_sched_job_submit(worker->jobs[i]);
+
+               done = drm_mock_sched_job_wait_finished(worker->jobs[i],
+                                                       
usecs_to_jiffies(worker->timeout_us));
+               if (!done)
+                       kunit_info(test_ctx->test, "Job %u of worker %u 
timedout\n",
+                                  i, worker->id);
+       }
+}
+
+/*
+ * Spawns workers that submit a periodic sequence of jobs to the mock 
scheduler.
+ * Uses harmonic periods to allow interleaving and cycles through them to 
prevent
+ * excessively large execution times. Since the scheduler serializes jobs from 
all
+ * workers, the timeout is set to the hyperperiod with a 2x safety factor. 
Entities
+ * and jobs are pre-allocated in the main thread to avoid using KUnit 
assertions in
+ * the workers.
+ */
+static void drm_sched_interleaved_submit_test(struct kunit *test)
+{
+       struct sched_concurrent_context *ctx = test->priv;
+       const struct drm_sched_interleaved_params *params = test->param_value;
+       struct interleaved_worker *workers;
+       unsigned int period_max_us, timeout_us;
+       unsigned int i, j;
+       bool done;
+       int ret;
+
+       workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
+                               GFP_KERNEL);
+       KUNIT_ASSERT_NOT_NULL(test, workers);
+
+       period_max_us = params->job_base_period_us * (1 << 
params->periods_cycle);
+       timeout_us = params->num_workers * period_max_us * 2;
+
+       /*
+        * Init workers only after all jobs and entities have been successfully
+        * allocated. In this way, the cleanup logic for when an assertion fail
+        * can be simplified.
+        */
+       for (i = 0; i < params->num_workers; i++) {
+               workers[i].id = i;
+               workers[i].ctx = ctx;
+               workers[i].timeout_us = timeout_us;
+
+               if (i % params->periods_cycle == 0)
+                       workers[i].period_us = params->job_base_period_us;
+               else
+                       workers[i].period_us = workers[i - 1].period_us * 2;


Are the two if statements effectively equivalent to:

 period_us = params->job_base_period_us << (i % params->periods_cycle);

?

Also, do you have an idea how to locally calculate a suitableperiods_cycle instead of having to come up with a number in thedrm_sched_interleaved_params array. Just strikes me as hard to know whatto put in there if someone would want to add a test.

+
+               workers[i].entity = drm_mock_sched_entity_new(test,
+                                                             
DRM_SCHED_PRIORITY_NORMAL,
+                                                             ctx->sched);
+
+               ret = kunit_add_action_or_reset(test, 
drm_mock_sched_entity_free_wrap,
+                                               workers[i].entity);
+               KUNIT_ASSERT_EQ(test, ret, 0);
+
+               workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
+                                               sizeof(*workers[i].jobs), 
GFP_KERNEL);
+               KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
+
+               for (j = 0; j < params->num_jobs; j++) {
+                       workers[i].jobs[j] = drm_mock_sched_job_new(test,
+                                                                   
workers[i].entity);
+                       done = 
drm_mock_sched_job_is_finished(workers[i].jobs[j]);
+                       KUNIT_ASSERT_FALSE(test, done);


Same as above, this is asserted by a basic test case so I'd just remove it.

+               }
+       }
+
+       for (i = 0; i < params->num_workers; i++) {
+               INIT_WORK(&workers[i].work, drm_sched_interleaved_worker);
+               queue_work(ctx->sub_wq, &workers[i].work);
+       }
+
+       complete_all(&ctx->wait_go);
+       flush_workqueue(ctx->sub_wq);
+
+       for (i = 0; i < params->num_workers; i++) {
+               for (j = 0; j < params->num_jobs; j++) {
+                       done = 
drm_mock_sched_job_is_finished(workers[i].jobs[j]);
+                       KUNIT_ASSERT_TRUE(test, done);
+               }
+       }


Here as well you could wait just for the last job per worker.

On the overal submission pattern - Don't you end up with a very unevenactivity between the workers? Because the job duration is doubling andworkers are single-shot, once the low index workers are done they aredone, and all that remains are the higher ones, and so the wave of fewerand fewer active workers continues. Low index worker do not end upracing with the job completions of the high index workers but onlybetween themselves, and even that with the cycle=10 workers=16 case iseven more limited.

Alternative approach could be to set a per test time budget and justkeep the workers submitting until over. It would be simpler tounderstand and there would be more submit/complete overlap.


Regards,

Tvrtko

+}
+
+static struct kunit_case drm_sched_concurrent_tests[] = {
+       KUNIT_CASE_PARAM(drm_sched_parallel_submit_test, 
drm_sched_parallel_gen_params),
+       KUNIT_CASE_PARAM(drm_sched_interleaved_submit_test, 
drm_sched_interleaved_gen_params),
+       {}
+};
+
+static struct kunit_suite drm_sched_concurrent = {
+       .name = "drm_sched_concurrent_tests",
+       .init = drm_sched_concurrent_init,
+       .test_cases = drm_sched_concurrent_tests,
+};
+
  static struct kunit_case drm_sched_cancel_tests[] = {
        KUNIT_CASE(drm_sched_basic_cancel),
        {}
@@ -556,6 +893,7 @@ static struct kunit_suite drm_sched_credits = {
  };

kunit_test_suites(&drm_sched_basic,

+                 &drm_sched_concurrent,
                  &drm_sched_timeout,
                  &drm_sched_cancel,
                  &drm_sched_priority,

Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission

Reply via email to