On Wed, Nov 26, 2025 at 12:19:16PM -0800, Matthew Brost wrote: > We now have proper infrastructure to accurately check the LRC timestamp > without toggling the scheduling state for non-VFs. For VFs, it is still > possible to get an inaccurate view if the context is on hardware. We > guard against free-running contexts on VFs by banning jobs whose > timestamps are not moving. In addition, VFs have a timeslice quantum > that naturally triggers context switches when more than one VF is > running, thus updating the LRC timestamp. > > For multi-queue, it is desirable to avoid scheduling toggling in the TDR > because this scheduling state is shared among many queues. Furthermore, > this change simplifies the GuC state machine. The trade-off for VF cases > seems worthwhile. > > v5: > - Add xe_lrc_timestamp helper (Umesh) >
Ignore this patch, this is broken a VF. I believe I have a fix but that doesn't appear to be working either... I'll have to dig in after the break. Matt > Signed-off-by: Matthew Brost <[email protected]> > --- > drivers/gpu/drm/xe/xe_guc_submit.c | 97 ++++++------------------- > drivers/gpu/drm/xe/xe_lrc.c | 42 +++++++---- > drivers/gpu/drm/xe/xe_lrc.h | 3 +- > drivers/gpu/drm/xe/xe_sched_job.c | 1 + > drivers/gpu/drm/xe/xe_sched_job_types.h | 2 + > 5 files changed, 56 insertions(+), 89 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c > b/drivers/gpu/drm/xe/xe_guc_submit.c > index db3c57d758c6..b8022826795b 100644 > --- a/drivers/gpu/drm/xe/xe_guc_submit.c > +++ b/drivers/gpu/drm/xe/xe_guc_submit.c > @@ -68,9 +68,7 @@ exec_queue_to_guc(struct xe_exec_queue *q) > #define EXEC_QUEUE_STATE_KILLED (1 << 7) > #define EXEC_QUEUE_STATE_WEDGED (1 << 8) > #define EXEC_QUEUE_STATE_BANNED (1 << 9) > -#define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) > -#define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 11) > -#define EXEC_QUEUE_STATE_PENDING_TDR_EXIT (1 << 12) > +#define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 10) > > static bool exec_queue_registered(struct xe_exec_queue *q) > { > @@ -202,21 +200,6 @@ static void set_exec_queue_wedged(struct xe_exec_queue > *q) > atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); > } > > -static bool exec_queue_check_timeout(struct xe_exec_queue *q) > -{ > - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT; > -} > - > -static void set_exec_queue_check_timeout(struct xe_exec_queue *q) > -{ > - atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); > -} > - > -static void clear_exec_queue_check_timeout(struct xe_exec_queue *q) > -{ > - atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); > -} > - > static bool exec_queue_pending_resume(struct xe_exec_queue *q) > { > return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; > @@ -232,21 +215,6 @@ static void clear_exec_queue_pending_resume(struct > xe_exec_queue *q) > atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); > } > > -static bool exec_queue_pending_tdr_exit(struct xe_exec_queue *q) > -{ > - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_TDR_EXIT; > -} > - > -static void set_exec_queue_pending_tdr_exit(struct xe_exec_queue *q) > -{ > - atomic_or(EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state); > -} > - > -static void clear_exec_queue_pending_tdr_exit(struct xe_exec_queue *q) > -{ > - atomic_and(~EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state); > -} > - > static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) > { > return (atomic_read(&q->guc->state) & > @@ -1006,7 +974,16 @@ static bool check_timeout(struct xe_exec_queue *q, > struct xe_sched_job *job) > return xe_sched_invalidate_job(job, 2); > } > > - ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0])); > + ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0])); > + if (ctx_timestamp == job->sample_timestamp) { > + xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, > guc_id=%d, timestamp stuck", > + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), > + q->guc->id); > + > + return xe_sched_invalidate_job(job, 2); > + } > + > + job->sample_timestamp = ctx_timestamp; > ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); > > /* > @@ -1132,16 +1109,17 @@ guc_exec_queue_timedout_job(struct drm_sched_job > *drm_job) > } > > /* > - * XXX: Sampling timeout doesn't work in wedged mode as we have to > - * modify scheduling state to read timestamp. We could read the > - * timestamp from a register to accumulate current running time but this > - * doesn't work for SRIOV. For now assuming timeouts in wedged mode are > - * genuine timeouts. > + * Check if job is actually timed out, if so restart job execution and > TDR > */ > + if (!skip_timeout_check && !check_timeout(q, job)) > + goto rearm; > + > if (!exec_queue_killed(q)) > wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); > > - /* Engine state now stable, disable scheduling to check timestamp */ > + set_exec_queue_banned(q); > + > + /* Kick job / queue off hardware */ > if (!wedged && (exec_queue_enabled(q) || > exec_queue_pending_disable(q))) { > int ret; > > @@ -1163,13 +1141,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job > *drm_job) > if (!ret || xe_guc_read_stopped(guc)) > goto trigger_reset; > > - /* > - * Flag communicates to G2H handler that schedule > - * disable originated from a timeout check. The G2H then > - * avoid triggering cleanup or deregistering the exec > - * queue. > - */ > - set_exec_queue_check_timeout(q); > disable_scheduling(q, skip_timeout_check); > } > > @@ -1198,22 +1169,12 @@ guc_exec_queue_timedout_job(struct drm_sched_job > *drm_job) > xe_devcoredump(q, job, > "Schedule disable failed to respond, > guc_id=%d, ret=%d, guc_read=%d", > q->guc->id, ret, > xe_guc_read_stopped(guc)); > - set_exec_queue_banned(q); > xe_gt_reset_async(q->gt); > xe_sched_tdr_queue_imm(sched); > goto rearm; > } > } > > - /* > - * Check if job is actually timed out, if so restart job execution and > TDR > - */ > - if (!wedged && !skip_timeout_check && !check_timeout(q, job) && > - !exec_queue_reset(q) && exec_queue_registered(q)) { > - clear_exec_queue_check_timeout(q); > - goto sched_enable; > - } > - > if (q->vm && q->vm->xef) { > process_name = q->vm->xef->process_name; > pid = q->vm->xef->pid; > @@ -1244,14 +1205,11 @@ guc_exec_queue_timedout_job(struct drm_sched_job > *drm_job) > if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || > (q->flags & EXEC_QUEUE_FLAG_VM && > !exec_queue_killed(q)))) { > if (!xe_sched_invalidate_job(job, 2)) { > - clear_exec_queue_check_timeout(q); > xe_gt_reset_async(q->gt); > goto rearm; > } > } > > - set_exec_queue_banned(q); > - > /* Mark all outstanding jobs as bad, thus completing them */ > xe_sched_job_set_error(job, err); > drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL) > @@ -1266,9 +1224,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job > *drm_job) > */ > return DRM_GPU_SCHED_STAT_NO_HANG; > > -sched_enable: > - set_exec_queue_pending_tdr_exit(q); > - enable_scheduling(q); > rearm: > /* > * XXX: Ideally want to adjust timeout based on current execution time > @@ -1898,8 +1853,7 @@ static void > guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, > q->guc->id); > } > > - if (pending_enable && !pending_resume && > - !exec_queue_pending_tdr_exit(q)) { > + if (pending_enable && !pending_resume) { > clear_exec_queue_registered(q); > xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", > q->guc->id); > @@ -1908,7 +1862,6 @@ static void > guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, > if (pending_enable) { > clear_exec_queue_enabled(q); > clear_exec_queue_pending_resume(q); > - clear_exec_queue_pending_tdr_exit(q); > clear_exec_queue_pending_enable(q); > xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", > q->guc->id); > @@ -1934,7 +1887,6 @@ static void > guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, > if (!pending_enable) > set_exec_queue_enabled(q); > clear_exec_queue_pending_disable(q); > - clear_exec_queue_check_timeout(q); > xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", > q->guc->id); > } > @@ -2274,13 +2226,10 @@ static void handle_sched_done(struct xe_guc *guc, > struct xe_exec_queue *q, > > q->guc->resume_time = ktime_get(); > clear_exec_queue_pending_resume(q); > - clear_exec_queue_pending_tdr_exit(q); > clear_exec_queue_pending_enable(q); > smp_wmb(); > wake_up_all(&guc->ct.wq); > } else { > - bool check_timeout = exec_queue_check_timeout(q); > - > xe_gt_assert(guc_to_gt(guc), runnable_state == 0); > xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); > > @@ -2288,11 +2237,11 @@ static void handle_sched_done(struct xe_guc *guc, > struct xe_exec_queue *q, > suspend_fence_signal(q); > clear_exec_queue_pending_disable(q); > } else { > - if (exec_queue_banned(q) || check_timeout) { > + if (exec_queue_banned(q)) { > smp_wmb(); > wake_up_all(&guc->ct.wq); > } > - if (!check_timeout && exec_queue_destroyed(q)) { > + if (exec_queue_destroyed(q)) { > /* > * Make sure to clear the pending_disable only > * after sampling the destroyed state. We want > @@ -2402,7 +2351,7 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, > u32 *msg, u32 len) > * guc_exec_queue_timedout_job. > */ > set_exec_queue_reset(q); > - if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) > + if (!exec_queue_banned(q)) > xe_guc_exec_queue_trigger_cleanup(q); > > return 0; > @@ -2483,7 +2432,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct > xe_guc *guc, u32 *msg, > > /* Treat the same as engine reset */ > set_exec_queue_reset(q); > - if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) > + if (!exec_queue_banned(q)) > xe_guc_exec_queue_trigger_cleanup(q); > > return 0; > diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c > index b5083c99dd50..c9bfd11a8d5e 100644 > --- a/drivers/gpu/drm/xe/xe_lrc.c > +++ b/drivers/gpu/drm/xe/xe_lrc.c > @@ -839,7 +839,7 @@ u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc) > * > * Returns: ctx timestamp value > */ > -u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) > +static u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) > { > struct xe_device *xe = lrc_to_xe(lrc); > struct iosys_map map; > @@ -2353,35 +2353,29 @@ static int get_ctx_timestamp(struct xe_lrc *lrc, u32 > engine_id, u64 *reg_ctx_ts) > } > > /** > - * xe_lrc_update_timestamp() - Update ctx timestamp > + * xe_lrc_timestamp() - Current ctx timestamp > * @lrc: Pointer to the lrc. > - * @old_ts: Old timestamp value > * > - * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and > - * update saved value. With support for active contexts, the calculation may > be > - * slightly racy, so follow a read-again logic to ensure that the context is > - * still active before returning the right timestamp. > + * Return latest ctx timestamp. > * > * Returns: New ctx timestamp value > */ > -u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts) > +u64 xe_lrc_timestamp(struct xe_lrc *lrc) > { > - u64 lrc_ts, reg_ts; > + u64 lrc_ts, reg_ts, new_ts; > u32 engine_id; > > - *old_ts = lrc->ctx_timestamp; > - > lrc_ts = xe_lrc_ctx_timestamp(lrc); > /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */ > if (IS_SRIOV_VF(lrc_to_xe(lrc))) { > - lrc->ctx_timestamp = lrc_ts; > + new_ts = lrc_ts; > goto done; > } > > if (lrc_ts == CONTEXT_ACTIVE) { > engine_id = xe_lrc_engine_id(lrc); > if (!get_ctx_timestamp(lrc, engine_id, ®_ts)) > - lrc->ctx_timestamp = reg_ts; > + new_ts = reg_ts; > > /* read lrc again to ensure context is still active */ > lrc_ts = xe_lrc_ctx_timestamp(lrc); > @@ -2392,9 +2386,29 @@ u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 > *old_ts) > * be a separate if condition. > */ > if (lrc_ts != CONTEXT_ACTIVE) > - lrc->ctx_timestamp = lrc_ts; > + new_ts = lrc_ts; > > done: > + return new_ts; > +} > + > +/** > + * xe_lrc_update_timestamp() - Update ctx timestamp > + * @lrc: Pointer to the lrc. > + * @old_ts: Old timestamp value > + * > + * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and > + * update saved value. With support for active contexts, the calculation may > be > + * slightly racy, so follow a read-again logic to ensure that the context is > + * still active before returning the right timestamp. > + * > + * Returns: New ctx timestamp value > + */ > +u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts) > +{ > + *old_ts = lrc->ctx_timestamp; > + lrc->ctx_timestamp = xe_lrc_timestamp(lrc); > + > trace_xe_lrc_update_timestamp(lrc, *old_ts); > > return lrc->ctx_timestamp; > diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h > index 2fb628da5c43..86b7174f424a 100644 > --- a/drivers/gpu/drm/xe/xe_lrc.h > +++ b/drivers/gpu/drm/xe/xe_lrc.h > @@ -140,7 +140,6 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot > *snapshot); > > u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc); > u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc); > -u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); > u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc); > u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); > int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine > *hwe, > @@ -160,4 +159,6 @@ int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, > struct xe_hw_engine *hwe > */ > u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts); > > +u64 xe_lrc_timestamp(struct xe_lrc *lrc); > + > #endif > diff --git a/drivers/gpu/drm/xe/xe_sched_job.c > b/drivers/gpu/drm/xe/xe_sched_job.c > index cb674a322113..39aec7f6d86d 100644 > --- a/drivers/gpu/drm/xe/xe_sched_job.c > +++ b/drivers/gpu/drm/xe/xe_sched_job.c > @@ -110,6 +110,7 @@ struct xe_sched_job *xe_sched_job_create(struct > xe_exec_queue *q, > return ERR_PTR(-ENOMEM); > > job->q = q; > + job->sample_timestamp = U64_MAX; > kref_init(&job->refcount); > xe_exec_queue_get(job->q); > > diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h > b/drivers/gpu/drm/xe/xe_sched_job_types.h > index 7c4c54fe920a..13c2970e81a8 100644 > --- a/drivers/gpu/drm/xe/xe_sched_job_types.h > +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h > @@ -59,6 +59,8 @@ struct xe_sched_job { > u32 lrc_seqno; > /** @migrate_flush_flags: Additional flush flags for migration jobs */ > u32 migrate_flush_flags; > + /** @sample_timestamp: Sampling of job timestamp in TDR */ > + u64 sample_timestamp; > /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. > */ > bool ring_ops_flush_tlb; > /** @ggtt: mapped in ggtt. */ > -- > 2.34.1 >
