Add a new test suite to simulate concurrent job submissions to the DRM
scheduler. The suite includes two initial test cases: (i) a primary test
case for parallel job submission and (ii) a secondary test case for
interleaved job submission and completion. In the first test case, worker
threads concurrently submit jobs to the scheduler and then the timeline is
manually advanced. In the second test case, worker threads periodically
submit a sequence of jobs to the mock scheduler. Periods are chosen as
harmonic, starting from a base period, to allow interleaving between
submission and completion. To avoid impractically large execution times,
periods are cycled. The timeline is advanced automatically when the jobs
completes. This models how job submission from userspace (in process
context) may interleave with job completion (hrtimer callback interrupt
context, in the test case) by a physical device.

Signed-off-by: Marco Pagani <[email protected]>
---
Changes in v1:
- Split the original suite into two test cases (Tvrtko Ursulin).
- General test refactoring and variable renaming for clarity.
- Changed parameters to have a fairer workload distribution.
- Improved cleanup logic.
- Added kunit_info() prints for tracing workers.
---
 drivers/gpu/drm/scheduler/tests/tests_basic.c | 338 ++++++++++++++++++
 1 file changed, 338 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c 
b/drivers/gpu/drm/scheduler/tests/tests_basic.c
index 82a41a456b0a..391edcbaf43a 100644
--- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
+++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
@@ -2,6 +2,8 @@
 /* Copyright (c) 2025 Valve Corporation */
 
 #include <linux/delay.h>
+#include <linux/completion.h>
+#include <linux/workqueue.h>
 
 #include "sched_tests.h"
 
@@ -235,6 +237,341 @@ static void drm_sched_basic_cancel(struct kunit *test)
        KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
 }
 
+struct sched_concurrent_context {
+       struct drm_mock_scheduler *sched;
+       struct workqueue_struct *sub_wq;
+       struct kunit *test;
+       struct completion wait_go;
+};
+
+KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
+                           struct drm_mock_scheduler *);
+
+KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, 
drm_mock_sched_entity_free,
+                           struct drm_mock_sched_entity *);
+
+static void complete_destroy_workqueue(void *context)
+{
+       struct sched_concurrent_context *ctx = (struct sched_concurrent_context 
*)context;
+
+       complete_all(&ctx->wait_go);
+
+       destroy_workqueue(ctx->sub_wq);
+}
+
+static int drm_sched_concurrent_init(struct kunit *test)
+{
+       struct sched_concurrent_context *ctx;
+       int ret;
+
+       ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+       KUNIT_ASSERT_NOT_NULL(test, ctx);
+
+       init_completion(&ctx->wait_go);
+
+       ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
+
+       ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, 
ctx->sched);
+       KUNIT_ASSERT_EQ(test, ret, 0);
+
+       /* Use an unbounded workqueue to maximize job submission concurrency */
+       ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
+                                     WQ_UNBOUND_MAX_ACTIVE);
+       KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
+
+       ret = kunit_add_action_or_reset(test, complete_destroy_workqueue, ctx);
+       KUNIT_ASSERT_EQ(test, ret, 0);
+
+       ctx->test = test;
+       test->priv = ctx;
+
+       return 0;
+}
+
+struct drm_sched_parallel_params {
+       const char *description;
+       unsigned int num_jobs;
+       unsigned int num_workers;
+};
+
+static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
+       {
+               .description = "Workers submitting multiple jobs against a 
single entity",
+               .num_jobs = 4,
+               .num_workers = 16,
+       },
+};
+
+static void
+drm_sched_parallel_desc(const struct drm_sched_parallel_params *params, char 
*desc)
+{
+       strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(drm_sched_parallel, drm_sched_parallel_cases, 
drm_sched_parallel_desc);
+
+struct parallel_worker {
+       struct work_struct work;
+       struct sched_concurrent_context *ctx;
+       struct drm_mock_sched_entity *entity;
+       struct drm_mock_sched_job **jobs;
+       unsigned int id;
+};
+
+static void drm_sched_parallel_worker(struct work_struct *work)
+{
+       const struct drm_sched_parallel_params *params;
+       struct sched_concurrent_context *test_ctx;
+       struct parallel_worker *worker;
+       unsigned int i;
+
+       worker = container_of(work, struct parallel_worker, work);
+       test_ctx = worker->ctx;
+       params = test_ctx->test->param_value;
+
+       wait_for_completion(&test_ctx->wait_go);
+
+       kunit_info(test_ctx->test, "Parallel worker %u started\n", worker->id);
+
+       for (i = 0; i < params->num_jobs; i++)
+               drm_mock_sched_job_submit(worker->jobs[i]);
+}
+
+/*
+ * Spawns workers that submit a sequence of jobs to the mock scheduler.
+ * Once all jobs are submitted, the timeline is manually advanced.
+ */
+static void drm_sched_parallel_submit_test(struct kunit *test)
+{
+       struct sched_concurrent_context *ctx = test->priv;
+       const struct drm_sched_parallel_params *params = test->param_value;
+       struct parallel_worker *workers;
+       unsigned int i, j, completed_jobs;
+       bool done;
+       int ret;
+
+       workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
+                               GFP_KERNEL);
+       KUNIT_ASSERT_NOT_NULL(test, workers);
+
+       /*
+        * Init workers only after all jobs and entities have been successfully
+        * allocated. In this way, the cleanup logic for when an assertion fail
+        * can be simplified.
+        */
+       for (i = 0; i < params->num_workers; i++) {
+               workers[i].id = i;
+               workers[i].ctx = ctx;
+               workers[i].entity = drm_mock_sched_entity_new(test,
+                                                             
DRM_SCHED_PRIORITY_NORMAL,
+                                                             ctx->sched);
+
+               ret = kunit_add_action_or_reset(test, 
drm_mock_sched_entity_free_wrap,
+                                               workers[i].entity);
+               KUNIT_ASSERT_EQ(test, ret, 0);
+
+               workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
+                                               sizeof(*workers[i].jobs), 
GFP_KERNEL);
+               KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
+
+               for (j = 0; j < params->num_jobs; j++)
+                       workers[i].jobs[j] = drm_mock_sched_job_new(test,
+                                                                   
workers[i].entity);
+       }
+
+       for (i = 0; i < params->num_workers; i++) {
+               INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
+               queue_work(ctx->sub_wq, &workers[i].work);
+       }
+
+       complete_all(&ctx->wait_go);
+       flush_workqueue(ctx->sub_wq);
+
+       for (i = 0; i < params->num_workers; i++) {
+               for (j = 0; j < params->num_jobs; j++) {
+                       done = 
drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
+                                                                HZ);
+                       KUNIT_ASSERT_TRUE(test, done);
+
+                       done = 
drm_mock_sched_job_is_finished(workers[i].jobs[j]);
+                       KUNIT_ASSERT_FALSE(test, done);
+               }
+       }
+
+       completed_jobs = drm_mock_sched_advance(ctx->sched,
+                                               params->num_workers * 
params->num_jobs);
+       KUNIT_ASSERT_EQ(test, completed_jobs, params->num_workers * 
params->num_jobs);
+
+       for (i = 0; i < params->num_workers; i++) {
+               for (j = 0; j < params->num_jobs; j++) {
+                       done = 
drm_mock_sched_job_is_finished(workers[i].jobs[j]);
+                       KUNIT_ASSERT_TRUE(test, done);
+               }
+       }
+}
+
+struct drm_sched_interleaved_params {
+       const char *description;
+       unsigned int job_base_period_us;
+       unsigned int periods_cycle;
+       unsigned int num_jobs;
+       unsigned int num_workers;
+};
+
+static const struct drm_sched_interleaved_params drm_sched_interleaved_cases[] 
= {
+       {
+               .description = "Workers submitting single jobs against a single 
entity",
+               .job_base_period_us = 100,
+               .periods_cycle = 10,
+               .num_jobs = 1,
+               .num_workers = 32,
+       },
+       {
+               .description = "Workers submitting multiple jobs against a 
single entity",
+               .job_base_period_us = 100,
+               .periods_cycle = 10,
+               .num_jobs = 4,
+               .num_workers = 16,
+       },
+};
+
+static void
+drm_sched_interleaved_desc(const struct drm_sched_interleaved_params *params, 
char *desc)
+{
+       strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(drm_sched_interleaved, drm_sched_interleaved_cases,
+                 drm_sched_interleaved_desc);
+
+struct interleaved_worker {
+       struct work_struct work;
+       struct sched_concurrent_context *ctx;
+       struct drm_mock_sched_entity *entity;
+       struct drm_mock_sched_job **jobs;
+       unsigned int id;
+       unsigned int period_us;
+       unsigned int timeout_us;
+};
+
+static void drm_sched_interleaved_worker(struct work_struct *work)
+{
+       const struct drm_sched_interleaved_params *params;
+       struct sched_concurrent_context *test_ctx;
+       struct interleaved_worker *worker;
+       unsigned int i;
+       bool done;
+
+       worker = container_of(work, struct interleaved_worker, work);
+       test_ctx = worker->ctx;
+       params = test_ctx->test->param_value;
+
+       wait_for_completion(&test_ctx->wait_go);
+
+       kunit_info(test_ctx->test, "Interleaved worker %u with period %u us 
started\n",
+                  worker->id, worker->period_us);
+
+       /* Release jobs as a periodic sequence */
+       for (i = 0; i < params->num_jobs; i++) {
+               drm_mock_sched_job_set_duration_us(worker->jobs[i], 
worker->period_us);
+               drm_mock_sched_job_submit(worker->jobs[i]);
+
+               done = drm_mock_sched_job_wait_finished(worker->jobs[i],
+                                                       
usecs_to_jiffies(worker->timeout_us));
+               if (!done)
+                       kunit_info(test_ctx->test, "Job %u of worker %u 
timedout\n",
+                                  i, worker->id);
+       }
+}
+
+/*
+ * Spawns workers that submit a periodic sequence of jobs to the mock 
scheduler.
+ * Uses harmonic periods to allow interleaving and cycles through them to 
prevent
+ * excessively large execution times. Since the scheduler serializes jobs from 
all
+ * workers, the timeout is set to the hyperperiod with a 2x safety factor. 
Entities
+ * and jobs are pre-allocated in the main thread to avoid using KUnit 
assertions in
+ * the workers.
+ */
+static void drm_sched_interleaved_submit_test(struct kunit *test)
+{
+       struct sched_concurrent_context *ctx = test->priv;
+       const struct drm_sched_interleaved_params *params = test->param_value;
+       struct interleaved_worker *workers;
+       unsigned int period_max_us, timeout_us;
+       unsigned int i, j;
+       bool done;
+       int ret;
+
+       workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
+                               GFP_KERNEL);
+       KUNIT_ASSERT_NOT_NULL(test, workers);
+
+       period_max_us = params->job_base_period_us * (1 << 
params->periods_cycle);
+       timeout_us = params->num_workers * period_max_us * 2;
+
+       /*
+        * Init workers only after all jobs and entities have been successfully
+        * allocated. In this way, the cleanup logic for when an assertion fail
+        * can be simplified.
+        */
+       for (i = 0; i < params->num_workers; i++) {
+               workers[i].id = i;
+               workers[i].ctx = ctx;
+               workers[i].timeout_us = timeout_us;
+
+               if (i % params->periods_cycle == 0)
+                       workers[i].period_us = params->job_base_period_us;
+               else
+                       workers[i].period_us = workers[i - 1].period_us * 2;
+
+               workers[i].entity = drm_mock_sched_entity_new(test,
+                                                             
DRM_SCHED_PRIORITY_NORMAL,
+                                                             ctx->sched);
+
+               ret = kunit_add_action_or_reset(test, 
drm_mock_sched_entity_free_wrap,
+                                               workers[i].entity);
+               KUNIT_ASSERT_EQ(test, ret, 0);
+
+               workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
+                                               sizeof(*workers[i].jobs), 
GFP_KERNEL);
+               KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
+
+               for (j = 0; j < params->num_jobs; j++) {
+                       workers[i].jobs[j] = drm_mock_sched_job_new(test,
+                                                                   
workers[i].entity);
+                       done = 
drm_mock_sched_job_is_finished(workers[i].jobs[j]);
+                       KUNIT_ASSERT_FALSE(test, done);
+               }
+       }
+
+       for (i = 0; i < params->num_workers; i++) {
+               INIT_WORK(&workers[i].work, drm_sched_interleaved_worker);
+               queue_work(ctx->sub_wq, &workers[i].work);
+       }
+
+       complete_all(&ctx->wait_go);
+       flush_workqueue(ctx->sub_wq);
+
+       for (i = 0; i < params->num_workers; i++) {
+               for (j = 0; j < params->num_jobs; j++) {
+                       done = 
drm_mock_sched_job_is_finished(workers[i].jobs[j]);
+                       KUNIT_ASSERT_TRUE(test, done);
+               }
+       }
+}
+
+static struct kunit_case drm_sched_concurrent_tests[] = {
+       KUNIT_CASE_PARAM(drm_sched_parallel_submit_test, 
drm_sched_parallel_gen_params),
+       KUNIT_CASE_PARAM(drm_sched_interleaved_submit_test, 
drm_sched_interleaved_gen_params),
+       {}
+};
+
+static struct kunit_suite drm_sched_concurrent = {
+       .name = "drm_sched_concurrent_tests",
+       .init = drm_sched_concurrent_init,
+       .test_cases = drm_sched_concurrent_tests,
+};
+
 static struct kunit_case drm_sched_cancel_tests[] = {
        KUNIT_CASE(drm_sched_basic_cancel),
        {}
@@ -556,6 +893,7 @@ static struct kunit_suite drm_sched_credits = {
 };
 
 kunit_test_suites(&drm_sched_basic,
+                 &drm_sched_concurrent,
                  &drm_sched_timeout,
                  &drm_sched_cancel,
                  &drm_sched_priority,
-- 
2.52.0

Reply via email to