On 30/05/2025 15:01, Maíra Canal wrote:
Add a test to submit a single job against a scheduler with the timeout
configured and verify that if the job is still running, the timeout
handler will skip the reset and allow the job to complete.

Signed-off-by: Maíra Canal <[email protected]>
---
  drivers/gpu/drm/scheduler/tests/mock_scheduler.c |  5 +++
  drivers/gpu/drm/scheduler/tests/sched_tests.h    |  1 +
  drivers/gpu/drm/scheduler/tests/tests_basic.c    | 43 ++++++++++++++++++++++++
  3 files changed, 49 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c 
b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
index 
fdf5f34b39e02c8a8648d8bea566a27fd3251516..39429f5cd19ee3c23816f257d566b47d3daa4baa
 100644
--- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
+++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
@@ -208,6 +208,11 @@ mock_sched_timedout_job(struct drm_sched_job *sched_job)
job->flags |= DRM_MOCK_SCHED_JOB_TIMEDOUT; + if (job->flags & DRM_MOCK_SCHED_JOB_DONT_RESET) {
+               job->flags &= ~DRM_MOCK_SCHED_JOB_DONT_RESET;

If it isn't important to clear the flag I would consider omitting it.

+               return DRM_GPU_SCHED_STAT_NO_HANG;
+       }
+
        return DRM_GPU_SCHED_STAT_RESET;
  }
diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h b/drivers/gpu/drm/scheduler/tests/sched_tests.h
index 
27caf8285fb74b9f3c9ce2daa1c44d4a0c967e92..5259f181e55387c41efbcd3f6addc9465331d787
 100644
--- a/drivers/gpu/drm/scheduler/tests/sched_tests.h
+++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h
@@ -98,6 +98,7 @@ struct drm_mock_sched_job {
#define DRM_MOCK_SCHED_JOB_DONE 0x1
  #define DRM_MOCK_SCHED_JOB_TIMEDOUT   0x2
+#define DRM_MOCK_SCHED_JOB_DONT_RESET  0x4
        unsigned long           flags;
struct list_head link;
diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c 
b/drivers/gpu/drm/scheduler/tests/tests_basic.c
index 
41c648782f4548e202bd8711b45d28eead9bd0b2..2ba2d1b0c3cad9626ab9d89cfae05244c670a826
 100644
--- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
+++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
@@ -246,8 +246,51 @@ static void drm_sched_basic_timeout(struct kunit *test)
        drm_mock_sched_entity_free(entity);
  }
+static void drm_sched_skip_reset(struct kunit *test)
+{
+       struct drm_mock_scheduler *sched = test->priv;
+       struct drm_mock_sched_entity *entity;
+       struct drm_mock_sched_job *job;
+       bool done;
+
+       /*
+        * Submit a single job against a scheduler with the timeout configured
+        * and verify that if the job is still running, the timeout handler
+        * will skip the reset and allow the job to complete.
+        */
+
+       entity = drm_mock_sched_entity_new(test,
+                                          DRM_SCHED_PRIORITY_NORMAL,
+                                          sched);
+       job = drm_mock_sched_job_new(test, entity);
+
+       job->flags = DRM_MOCK_SCHED_JOB_DONT_RESET;
+
+       drm_mock_sched_job_set_duration_us(job, jiffies_to_usecs(2 * 
MOCK_TIMEOUT));

Might be easier to not set the duration but advance the job manually after the timeout assert. One time based interaction less.

+       drm_mock_sched_job_submit(job);
+
+       done = drm_mock_sched_job_wait_finished(job, MOCK_TIMEOUT);
+       KUNIT_ASSERT_FALSE(test, done);
+
+       KUNIT_ASSERT_EQ(test,
+                       job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT,
+                       DRM_MOCK_SCHED_JOB_TIMEDOUT);
+
+       KUNIT_ASSERT_EQ(test,
+                       job->flags & DRM_MOCK_SCHED_JOB_DONT_RESET,
+                       0);


Wait_finished for 200ms is equal to the configured job timeout so could this be a bit racy? Safer to wait for 2 * MOCK_TIMEOUT I think.

(I also wonder whether I should have made the flags bit operations atomic so the visibility between CPU cores running different threads is guaranteed. I might follow up with that tweak.)

+
+       KUNIT_ASSERT_FALSE(test, list_empty(&sched->job_list));

Going back to my first comment - if you remove the set_duration and instead of this assert have do KUNIT_ASSERT_EQ(drm_mock_sched_advance(), 1) I think that should be good enough and simpler.

Regards,

Tvrtko

+
+       done = drm_mock_sched_job_wait_finished(job, MOCK_TIMEOUT);
+       KUNIT_ASSERT_TRUE(test, done);
+
+       drm_mock_sched_entity_free(entity);
+}
+
  static struct kunit_case drm_sched_timeout_tests[] = {
        KUNIT_CASE(drm_sched_basic_timeout),
+       KUNIT_CASE(drm_sched_skip_reset),
        {}
  };

Reply via email to