execlists: Preempt-to-busy

Mika Kuoppala Thu, 20 Jun 2019 05:42:24 -0700

Chris Wilson <[email protected]> writes:

> When using a global seqno, we required a precise stop-the-workd event to
> handle preemption and unwind the global seqno counter. To accomplish
> this, we would preempt to a special out-of-band context and wait for the
> machine to report that it was idle. Given an idle machine, we could very
> precisely see which requests had completed and which we needed to feed
> back into the run queue.
>
> However, now that we have scrapped the global seqno, we no longer need
> to precisely unwind the global counter and only track requests by their
> per-context seqno. This allows us to loosely unwind inflight requests
> while scheduling a preemption, with the enormous caveat that the
> requests we put back on the run queue are still _inflight_ (until the
> preemption request is complete). This makes request tracking much more
> messy, as at any point then we can see a completed request that we
> believe is not currently scheduled for execution. We also have to be
> careful not to rewind RING_TAIL past RING_HEAD on preempting to the
> running context, and for this we use a semaphore to prevent completion
> of the request before continuing.
>
> To accomplish this feat, we change how we track requests scheduled to
> the HW. Instead of appending our requests onto a single list as we
> submit, we track each submission to ELSP as its own block. Then upon
> receiving the CS preemption event, we promote the pending block to the
> inflight block (discarding what was previously being tracked). As normal
> CS completion events arrive, we then remove stale entries from the
> inflight tracker.
>
> Signed-off-by: Chris Wilson <[email protected]>
> ---
> Skip the bonus asserts if the context is already completed.
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   |   2 +-
>  drivers/gpu/drm/i915/gt/intel_context_types.h |   5 +
>  drivers/gpu/drm/i915/gt/intel_engine.h        |  61 +-
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  63 +-
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  |  52 +-
>  drivers/gpu/drm/i915/gt/intel_lrc.c           | 678 ++++++++----------
>  drivers/gpu/drm/i915/i915_gpu_error.c         |  19 +-
>  drivers/gpu/drm/i915/i915_request.c           |   6 +
>  drivers/gpu/drm/i915/i915_request.h           |   1 +
>  drivers/gpu/drm/i915/i915_scheduler.c         |   3 +-
>  drivers/gpu/drm/i915/i915_utils.h             |  12 +
>  drivers/gpu/drm/i915/intel_guc_submission.c   | 175 ++---
>  drivers/gpu/drm/i915/selftests/i915_request.c |   8 +-
>  13 files changed, 475 insertions(+), 610 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 0f2c22a3bcb6..35871c8a42a6 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -646,7 +646,7 @@ static void init_contexts(struct drm_i915_private *i915)
>  
>  static bool needs_preempt_context(struct drm_i915_private *i915)
>  {
> -     return HAS_EXECLISTS(i915);
> +     return USES_GUC_SUBMISSION(i915);
>  }
>  
>  int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
> b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index 08049ee91cee..4c0e211c715d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -13,6 +13,7 @@
>  #include <linux/types.h>
>  
>  #include "i915_active_types.h"
> +#include "i915_utils.h"
>  #include "intel_engine_types.h"
>  #include "intel_sseu.h"
>  
> @@ -38,6 +39,10 @@ struct intel_context {
>       struct i915_gem_context *gem_context;
>       struct intel_engine_cs *engine;
>       struct intel_engine_cs *inflight;
> +#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
> +#define intel_context_inflight_count(ce)  ptr_unmask_bits((ce)->inflight, 2)
> +#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
> +#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)


Just curious here that what you consider the advantages of carrying
this info with the pointer?

>  
>       struct list_head signal_link;
>       struct list_head signals;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h 
> b/drivers/gpu/drm/i915/gt/intel_engine.h
> index 2f1c6871ee95..9bb6ff76680e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -125,71 +125,26 @@ hangcheck_action_to_str(const enum 
> intel_engine_hangcheck_action a)
>  
>  void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
>  
> -static inline void
> -execlists_set_active(struct intel_engine_execlists *execlists,
> -                  unsigned int bit)
> -{
> -     __set_bit(bit, (unsigned long *)&execlists->active);
> -}
> -
> -static inline bool
> -execlists_set_active_once(struct intel_engine_execlists *execlists,
> -                       unsigned int bit)
> -{
> -     return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
> -}
> -
> -static inline void
> -execlists_clear_active(struct intel_engine_execlists *execlists,
> -                    unsigned int bit)
> -{
> -     __clear_bit(bit, (unsigned long *)&execlists->active);
> -}
> -
> -static inline void
> -execlists_clear_all_active(struct intel_engine_execlists *execlists)
> +static inline unsigned int
> +execlists_num_ports(const struct intel_engine_execlists * const execlists)
>  {
> -     execlists->active = 0;
> +     return execlists->port_mask + 1;
>  }
>  
> -static inline bool
> -execlists_is_active(const struct intel_engine_execlists *execlists,
> -                 unsigned int bit)
> +static inline struct i915_request *
> +execlists_active(const struct intel_engine_execlists *execlists)
>  {
> -     return test_bit(bit, (unsigned long *)&execlists->active);
> +     GEM_BUG_ON(execlists->active - execlists->inflight >
> +                execlists_num_ports(execlists));
> +     return READ_ONCE(*execlists->active);
>  }
>  
> -void execlists_user_begin(struct intel_engine_execlists *execlists,
> -                       const struct execlist_port *port);
> -void execlists_user_end(struct intel_engine_execlists *execlists);
> -
>  void
>  execlists_cancel_port_requests(struct intel_engine_execlists * const 
> execlists);
>  
>  struct i915_request *
>  execlists_unwind_incomplete_requests(struct intel_engine_execlists 
> *execlists);
>  
> -static inline unsigned int
> -execlists_num_ports(const struct intel_engine_execlists * const execlists)
> -{
> -     return execlists->port_mask + 1;
> -}
> -
> -static inline struct execlist_port *
> -execlists_port_complete(struct intel_engine_execlists * const execlists,
> -                     struct execlist_port * const port)
> -{
> -     const unsigned int m = execlists->port_mask;
> -
> -     GEM_BUG_ON(port_index(port, execlists) != 0);
> -     GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
> -
> -     memmove(port, port + 1, m * sizeof(struct execlist_port));
> -     memset(port + m, 0, sizeof(struct execlist_port));
> -
> -     return port;
> -}
> -
>  static inline u32
>  intel_read_status_page(const struct intel_engine_cs *engine, int reg)
>  {
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 7fd33e81c2d9..d45328e254dc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -508,6 +508,10 @@ void intel_engine_init_execlists(struct intel_engine_cs 
> *engine)
>       GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
>       GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
>  
> +     memset(execlists->pending, 0, sizeof(execlists->pending));
> +     execlists->active =
> +             memset(execlists->inflight, 0, sizeof(execlists->inflight));
> +
>       execlists->queue_priority_hint = INT_MIN;
>       execlists->queue = RB_ROOT_CACHED;
>  }
> @@ -1152,7 +1156,7 @@ bool intel_engine_is_idle(struct intel_engine_cs 
> *engine)
>               return true;
>  
>       /* Waiting to drain ELSP? */
> -     if (READ_ONCE(engine->execlists.active)) {
> +     if (execlists_active(&engine->execlists)) {
>               struct tasklet_struct *t = &engine->execlists.tasklet;
>  
>               synchronize_hardirq(engine->i915->drm.irq);
> @@ -1169,7 +1173,7 @@ bool intel_engine_is_idle(struct intel_engine_cs 
> *engine)
>               /* Otherwise flush the tasklet if it was on another cpu */
>               tasklet_unlock_wait(t);
>  
> -             if (READ_ONCE(engine->execlists.active))
> +             if (execlists_active(&engine->execlists))
>                       return false;
>       }
>  
> @@ -1367,6 +1371,7 @@ static void intel_engine_print_registers(struct 
> intel_engine_cs *engine,
>       }
>  
>       if (HAS_EXECLISTS(dev_priv)) {
> +             struct i915_request * const *port, *rq;
>               const u32 *hws =
>                       &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
>               const u8 num_entries = execlists->csb_size;
> @@ -1399,27 +1404,33 @@ static void intel_engine_print_registers(struct 
> intel_engine_cs *engine,
>               }
>  
>               spin_lock_irqsave(&engine->active.lock, flags);
> -             for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
> -                     struct i915_request *rq;
> -                     unsigned int count;
> +             for (port = execlists->active; (rq = *port); port++) {
> +                     char hdr[80];
> +                     int len;
> +
> +                     len = snprintf(hdr, sizeof(hdr),
> +                                    "\t\tActive[%d: ",
> +                                    (int)(port - execlists->active));
> +                     if (!i915_request_signaled(rq))
> +                             len += snprintf(hdr + len, sizeof(hdr) - len,
> +                                             "ring:{start:%08x, hwsp:%08x, 
> seqno:%08x}, ",
> +                                             i915_ggtt_offset(rq->ring->vma),
> +                                             rq->timeline->hwsp_offset,
> +                                             hwsp_seqno(rq));
> +                     snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
> +                     print_request(m, rq, hdr);
> +             }
> +             for (port = execlists->pending; (rq = *port); port++) {
>                       char hdr[80];
>  
> -                     rq = port_unpack(&execlists->port[idx], &count);
> -                     if (!rq) {
> -                             drm_printf(m, "\t\tELSP[%d] idle\n", idx);
> -                     } else if (!i915_request_signaled(rq)) {
> -                             snprintf(hdr, sizeof(hdr),
> -                                      "\t\tELSP[%d] count=%d, 
> ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
> -                                      idx, count,
> -                                      i915_ggtt_offset(rq->ring->vma),
> -                                      rq->timeline->hwsp_offset,
> -                                      hwsp_seqno(rq));
> -                             print_request(m, rq, hdr);
> -                     } else {
> -                             print_request(m, rq, "\t\tELSP[%d] rq: ");
> -                     }
> +                     snprintf(hdr, sizeof(hdr),
> +                              "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, 
> seqno:%08x}, rq: ",
> +                              (int)(port - execlists->pending),
> +                              i915_ggtt_offset(rq->ring->vma),
> +                              rq->timeline->hwsp_offset,
> +                              hwsp_seqno(rq));
> +                     print_request(m, rq, hdr);
>               }
> -             drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
>               spin_unlock_irqrestore(&engine->active.lock, flags);
>       } else if (INTEL_GEN(dev_priv) > 6) {
>               drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
> @@ -1583,15 +1594,19 @@ int intel_enable_engine_stats(struct intel_engine_cs 
> *engine)
>       }
>  
>       if (engine->stats.enabled++ == 0) {
> -             const struct execlist_port *port = execlists->port;
> -             unsigned int num_ports = execlists_num_ports(execlists);
> +             struct i915_request * const *port;
> +             struct i915_request *rq;
>  
>               engine->stats.enabled_at = ktime_get();
>  
>               /* XXX submission method oblivious? */
> -             while (num_ports-- && port_isset(port)) {
> +             for (port = execlists->active; (rq = *port); port++)
>                       engine->stats.active++;
> -                     port++;
> +
> +             for (port = execlists->pending; (rq = *port); port++) {
> +                     /* Exclude any contexts already counted in active */
> +                     if (intel_context_inflight_count(rq->hw_context) == 1)
> +                             engine->stats.active++;
>               }
>  
>               if (engine->stats.active)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
> b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 43e975a26016..411b7a807b99 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -172,51 +172,10 @@ struct intel_engine_execlists {
>        */
>       u32 __iomem *ctrl_reg;
>  
> -     /**
> -      * @port: execlist port states
> -      *
> -      * For each hardware ELSP (ExecList Submission Port) we keep
> -      * track of the last request and the number of times we submitted
> -      * that port to hw. We then count the number of times the hw reports
> -      * a context completion or preemption. As only one context can
> -      * be active on hw, we limit resubmission of context to port[0]. This
> -      * is called Lite Restore, of the context.
> -      */
> -     struct execlist_port {
> -             /**
> -              * @request_count: combined request and submission count
> -              */
> -             struct i915_request *request_count;
> -#define EXECLIST_COUNT_BITS 2
> -#define port_request(p) ptr_mask_bits((p)->request_count, 
> EXECLIST_COUNT_BITS)
> -#define port_count(p) ptr_unmask_bits((p)->request_count, 
> EXECLIST_COUNT_BITS)
> -#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
> -#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, 
> EXECLIST_COUNT_BITS)
> -#define port_set(p, packed) ((p)->request_count = (packed))
> -#define port_isset(p) ((p)->request_count)
> -#define port_index(p, execlists) ((p) - (execlists)->port)
> -
> -             /**
> -              * @context_id: context ID for port
> -              */
> -             GEM_DEBUG_DECL(u32 context_id);
> -
>  #define EXECLIST_MAX_PORTS 2
> -     } port[EXECLIST_MAX_PORTS];
> -
> -     /**
> -      * @active: is the HW active? We consider the HW as active after
> -      * submitting any context for execution and until we have seen the
> -      * last context completion event. After that, we do not expect any
> -      * more events until we submit, and so can park the HW.
> -      *
> -      * As we have a small number of different sources from which we feed
> -      * the HW, we track the state of each inside a single bitfield.
> -      */
> -     unsigned int active;
> -#define EXECLISTS_ACTIVE_USER 0
> -#define EXECLISTS_ACTIVE_PREEMPT 1
> -#define EXECLISTS_ACTIVE_HWACK 2
> +     struct i915_request * const *active;
> +     struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */];
> +     struct i915_request *pending[EXECLIST_MAX_PORTS + 1];
>  
>       /**
>        * @port_mask: number of execlist ports - 1
> @@ -257,11 +216,6 @@ struct intel_engine_execlists {
>        */
>       u32 *csb_status;
>  
> -     /**
> -      * @preempt_complete_status: expected CSB upon completing preemption
> -      */
> -     u32 preempt_complete_status;
> -
>       /**
>        * @csb_size: context status buffer FIFO size
>        */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 82b7ace62d97..b1e45c651660 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -161,6 +161,8 @@
>  #define GEN8_CTX_STATUS_COMPLETED_MASK \
>        (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>  
> +#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
> +
>  /* Typical size of the average request (2 pipecontrols and a MI_BB) */
>  #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
>  #define WA_TAIL_DWORDS 2
> @@ -221,6 +223,14 @@ static void execlists_init_reg_state(u32 *reg_state,
>                                    struct intel_engine_cs *engine,
>                                    struct intel_ring *ring);
>  
> +static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
> +{
> +     return (i915_ggtt_offset(engine->status_page.vma) +
> +             I915_GEM_HWS_PREEMPT_ADDR);
> +}
> +
> +#define ring_pause(E) ((E)->status_page.addr[I915_GEM_HWS_PREEMPT])

Scary. Please lets make a function of ring_pause and use
intel_write_status_page in it.

So I guess you have and you want squeeze the latency fruit.

When we have everything in place, CI is green and
everyone is happy, then we tear it down?

> +
>  static inline struct i915_priolist *to_priolist(struct rb_node *rb)
>  {
>       return rb_entry(rb, struct i915_priolist, node);
> @@ -271,12 +281,6 @@ static inline bool need_preempt(const struct 
> intel_engine_cs *engine,
>  {
>       int last_prio;
>  
> -     if (!engine->preempt_context)
> -             return false;
> -
> -     if (i915_request_completed(rq))
> -             return false;
> -
>       /*
>        * Check if the current priority hint merits a preemption attempt.
>        *
> @@ -338,9 +342,6 @@ __maybe_unused static inline bool
>  assert_priority_queue(const struct i915_request *prev,
>                     const struct i915_request *next)
>  {
> -     const struct intel_engine_execlists *execlists =
> -             &prev->engine->execlists;
> -
>       /*
>        * Without preemption, the prev may refer to the still active element
>        * which we refuse to let go.
> @@ -348,7 +349,7 @@ assert_priority_queue(const struct i915_request *prev,
>        * Even with preemption, there are times when we think it is better not
>        * to preempt and leave an ostensibly lower priority request in flight.
>        */
> -     if (port_request(execlists->port) == prev)
> +     if (i915_request_is_active(prev))
>               return true;
>  
>       return rq_prio(prev) >= rq_prio(next);
> @@ -442,13 +443,11 @@ __unwind_incomplete_requests(struct intel_engine_cs 
> *engine)
>               struct intel_engine_cs *owner;
>  
>               if (i915_request_completed(rq))
> -                     break;
> +                     continue; /* XXX */

Yeah, but what is the plan with the XXX.


>  
>               __i915_request_unsubmit(rq);
>               unwind_wa_tail(rq);
>  
> -             GEM_BUG_ON(rq->hw_context->inflight);
> -
>               /*
>                * Push the request back into the queue for later resubmission.
>                * If this request is not native to this physical engine (i.e.
> @@ -500,32 +499,32 @@ execlists_context_status_change(struct i915_request 
> *rq, unsigned long status)
>                                  status, rq);
>  }
>  
> -inline void
> -execlists_user_begin(struct intel_engine_execlists *execlists,
> -                  const struct execlist_port *port)
> +static inline struct i915_request *
> +execlists_schedule_in(struct i915_request *rq, int idx)
>  {
> -     execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
> -}
> +     struct intel_context *ce = rq->hw_context;
> +     int count;
>  
> -inline void
> -execlists_user_end(struct intel_engine_execlists *execlists)
> -{
> -     execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
> -}
> +     trace_i915_request_in(rq, idx);
>  
> -static inline void
> -execlists_context_schedule_in(struct i915_request *rq)
> -{
> -     GEM_BUG_ON(rq->hw_context->inflight);
> +     count = intel_context_inflight_count(ce);
> +     if (!count) {
> +             intel_context_get(ce);
> +             ce->inflight = rq->engine;
> +
> +             execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
> +             intel_engine_context_in(ce->inflight);
> +     }
>  
> -     execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
> -     intel_engine_context_in(rq->engine);
> -     rq->hw_context->inflight = rq->engine;
> +     intel_context_inflight_inc(ce);
> +     GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
> +
> +     return i915_request_get(rq);
>  }
>  
> -static void kick_siblings(struct i915_request *rq)
> +static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
>  {
> -     struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
> +     struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
>       struct i915_request *next = READ_ONCE(ve->request);
>  
>       if (next && next->execution_mask & ~rq->execution_mask)
> @@ -533,29 +532,42 @@ static void kick_siblings(struct i915_request *rq)
>  }
>  
>  static inline void
> -execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
> +execlists_schedule_out(struct i915_request *rq)
>  {
> -     rq->hw_context->inflight = NULL;
> -     intel_engine_context_out(rq->engine);
> -     execlists_context_status_change(rq, status);
> +     struct intel_context *ce = rq->hw_context;
> +
> +     GEM_BUG_ON(!intel_context_inflight_count(ce));
> +
>       trace_i915_request_out(rq);
>  
> -     /*
> -      * If this is part of a virtual engine, its next request may have
> -      * been blocked waiting for access to the active context. We have
> -      * to kick all the siblings again in case we need to switch (e.g.
> -      * the next request is not runnable on this engine). Hopefully,
> -      * we will already have submitted the next request before the
> -      * tasklet runs and do not need to rebuild each virtual tree
> -      * and kick everyone again.
> -      */
> -     if (rq->engine != rq->hw_context->engine)
> -             kick_siblings(rq);
> +     intel_context_inflight_dec(ce);
> +     if (!intel_context_inflight_count(ce)) {
> +             intel_engine_context_out(ce->inflight);
> +             execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
> +
> +             ce->inflight = NULL;
> +             intel_context_put(ce);
> +
> +             /*
> +              * If this is part of a virtual engine, its next request may
> +              * have been blocked waiting for access to the active context.
> +              * We have to kick all the siblings again in case we need to
> +              * switch (e.g. the next request is not runnable on this
> +              * engine). Hopefully, we will already have submitted the next
> +              * request before the tasklet runs and do not need to rebuild
> +              * each virtual tree and kick everyone again.
> +              */
> +             if (rq->engine != ce->engine)
> +                     kick_siblings(rq, ce);
> +     }
> +
> +     i915_request_put(rq);
>  }
>  
> -static u64 execlists_update_context(struct i915_request *rq)
> +static u64 execlists_update_context(const struct i915_request *rq)
>  {
>       struct intel_context *ce = rq->hw_context;
> +     u64 desc;
>  
>       ce->lrc_reg_state[CTX_RING_TAIL + 1] =
>               intel_ring_set_tail(rq->ring, rq->tail);
> @@ -576,7 +588,11 @@ static u64 execlists_update_context(struct i915_request 
> *rq)
>        * wmb).
>        */
>       mb();
> -     return ce->lrc_desc;
> +
> +     desc = ce->lrc_desc;
> +     ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
> +
> +     return desc;
>  }
>  
>  static inline void write_desc(struct intel_engine_execlists *execlists, u64 
> desc, u32 port)
> @@ -590,12 +606,59 @@ static inline void write_desc(struct 
> intel_engine_execlists *execlists, u64 desc
>       }
>  }
>  
> +static __maybe_unused void
> +trace_ports(const struct intel_engine_execlists *execlists,
> +         const char *msg,
> +         struct i915_request * const *ports)
> +{
> +     const struct intel_engine_cs *engine =
> +             container_of(execlists, typeof(*engine), execlists);
> +
> +     GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
> +               engine->name, msg,
> +               ports[0]->fence.context,
> +               ports[0]->fence.seqno,
> +               i915_request_completed(ports[0]) ? "!" :
> +               i915_request_started(ports[0]) ? "*" :
> +               "",
> +               ports[1] ? ports[1]->fence.context : 0,
> +               ports[1] ? ports[1]->fence.seqno : 0);
> +}
> +
> +static __maybe_unused bool
> +assert_pending_valid(const struct intel_engine_execlists *execlists,
> +                  const char *msg)
> +{
> +     struct i915_request * const *port, *rq;
> +     struct intel_context *ce = NULL;
> +
> +     trace_ports(execlists, msg, execlists->pending);
> +
> +     if (execlists->pending[execlists_num_ports(execlists)])
> +             return false;
> +
> +     for (port = execlists->pending; (rq = *port); port++) {
> +             if (ce == rq->hw_context)
> +                     return false;
> +
> +             ce = rq->hw_context;
> +             if (i915_request_completed(rq))
> +                     continue;
> +
> +             GEM_BUG_ON(i915_active_is_idle(&ce->active));
> +             GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
> +     }
> +
> +     return ce;
> +}
> +
>  static void execlists_submit_ports(struct intel_engine_cs *engine)
>  {
>       struct intel_engine_execlists *execlists = &engine->execlists;
> -     struct execlist_port *port = execlists->port;
>       unsigned int n;
>  
> +     GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
> +
>       /*
>        * We can skip acquiring intel_runtime_pm_get() here as it was taken
>        * on our behalf by the request (see i915_gem_mark_busy()) and it will
> @@ -613,38 +676,16 @@ static void execlists_submit_ports(struct 
> intel_engine_cs *engine)
>        * of elsq entries, keep this in mind before changing the loop below.
>        */
>       for (n = execlists_num_ports(execlists); n--; ) {
> -             struct i915_request *rq;
> -             unsigned int count;
> -             u64 desc;
> +             struct i915_request *rq = execlists->pending[n];
>  
> -             rq = port_unpack(&port[n], &count);
> -             if (rq) {
> -                     GEM_BUG_ON(count > !n);
> -                     if (!count++)
> -                             execlists_context_schedule_in(rq);
> -                     port_set(&port[n], port_pack(rq, count));
> -                     desc = execlists_update_context(rq);
> -                     GEM_DEBUG_EXEC(port[n].context_id = 
> upper_32_bits(desc));
> -
> -                     GEM_TRACE("%s in[%d]:  ctx=%d.%d, fence %llx:%lld 
> (current %d), prio=%d\n",
> -                               engine->name, n,
> -                               port[n].context_id, count,
> -                               rq->fence.context, rq->fence.seqno,
> -                               hwsp_seqno(rq),
> -                               rq_prio(rq));
> -             } else {
> -                     GEM_BUG_ON(!n);
> -                     desc = 0;
> -             }
> -
> -             write_desc(execlists, desc, n);
> +             write_desc(execlists,
> +                        rq ? execlists_update_context(rq) : 0,
> +                        n);
>       }
>  
>       /* we need to manually load the submit queue */
>       if (execlists->ctrl_reg)
>               writel(EL_CTRL_LOAD, execlists->ctrl_reg);
> -
> -     execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
>  }
>  
>  static bool ctx_single_port_submission(const struct intel_context *ce)
> @@ -668,6 +709,7 @@ static bool can_merge_ctx(const struct intel_context 
> *prev,
>  static bool can_merge_rq(const struct i915_request *prev,
>                        const struct i915_request *next)
>  {
> +     GEM_BUG_ON(prev == next);
>       GEM_BUG_ON(!assert_priority_queue(prev, next));
>  
>       if (!can_merge_ctx(prev->hw_context, next->hw_context))
> @@ -676,58 +718,6 @@ static bool can_merge_rq(const struct i915_request *prev,
>       return true;
>  }
>  
> -static void port_assign(struct execlist_port *port, struct i915_request *rq)
> -{
> -     GEM_BUG_ON(rq == port_request(port));
> -
> -     if (port_isset(port))
> -             i915_request_put(port_request(port));
> -
> -     port_set(port, port_pack(i915_request_get(rq), port_count(port)));
> -}
> -
> -static void inject_preempt_context(struct intel_engine_cs *engine)
> -{
> -     struct intel_engine_execlists *execlists = &engine->execlists;
> -     struct intel_context *ce = engine->preempt_context;
> -     unsigned int n;
> -
> -     GEM_BUG_ON(execlists->preempt_complete_status !=
> -                upper_32_bits(ce->lrc_desc));
> -
> -     /*
> -      * Switch to our empty preempt context so
> -      * the state of the GPU is known (idle).
> -      */
> -     GEM_TRACE("%s\n", engine->name);
> -     for (n = execlists_num_ports(execlists); --n; )
> -             write_desc(execlists, 0, n);
> -
> -     write_desc(execlists, ce->lrc_desc, n);
> -
> -     /* we need to manually load the submit queue */
> -     if (execlists->ctrl_reg)
> -             writel(EL_CTRL_LOAD, execlists->ctrl_reg);
> -
> -     execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
> -     execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> -
> -     (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
> -}
> -
> -static void complete_preempt_context(struct intel_engine_execlists 
> *execlists)
> -{
> -     GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
> -
> -     if (inject_preempt_hang(execlists))
> -             return;
> -
> -     execlists_cancel_port_requests(execlists);
> -     __unwind_incomplete_requests(container_of(execlists,
> -                                               struct intel_engine_cs,
> -                                               execlists));
> -}
> -
>  static void virtual_update_register_offsets(u32 *regs,
>                                           struct intel_engine_cs *engine)
>  {
> @@ -792,7 +782,7 @@ static bool virtual_matches(const struct virtual_engine 
> *ve,
>        * we reuse the register offsets). This is a very small
>        * hystersis on the greedy seelction algorithm.
>        */
> -     inflight = READ_ONCE(ve->context.inflight);
> +     inflight = intel_context_inflight(&ve->context);
>       if (inflight && inflight != engine)
>               return false;
>  
> @@ -815,13 +805,23 @@ static void virtual_xfer_breadcrumbs(struct 
> virtual_engine *ve,
>       spin_unlock(&old->breadcrumbs.irq_lock);
>  }
>  
> +static struct i915_request *
> +last_active(const struct intel_engine_execlists *execlists)
> +{
> +     struct i915_request * const *last = execlists->active;
> +
> +     while (*last && i915_request_completed(*last))
> +             last++;
> +
> +     return *last;
> +}
> +
>  static void execlists_dequeue(struct intel_engine_cs *engine)
>  {
>       struct intel_engine_execlists * const execlists = &engine->execlists;
> -     struct execlist_port *port = execlists->port;
> -     const struct execlist_port * const last_port =
> -             &execlists->port[execlists->port_mask];
> -     struct i915_request *last = port_request(port);
> +     struct i915_request **port = execlists->pending;
> +     struct i915_request ** const last_port = port + execlists->port_mask;
> +     struct i915_request *last;
>       struct rb_node *rb;
>       bool submit = false;
>  
> @@ -867,65 +867,72 @@ static void execlists_dequeue(struct intel_engine_cs 
> *engine)
>               break;
>       }
>  
> +     /*
> +      * If the queue is higher priority than the last
> +      * request in the currently active context, submit afresh.
> +      * We will resubmit again afterwards in case we need to split
> +      * the active context to interject the preemption request,
> +      * i.e. we will retrigger preemption following the ack in case
> +      * of trouble.
> +      */
> +     last = last_active(execlists);
>       if (last) {
> -             /*
> -              * Don't resubmit or switch until all outstanding
> -              * preemptions (lite-restore) are seen. Then we
> -              * know the next preemption status we see corresponds
> -              * to this ELSP update.
> -              */
> -             GEM_BUG_ON(!execlists_is_active(execlists,
> -                                             EXECLISTS_ACTIVE_USER));
> -             GEM_BUG_ON(!port_count(&port[0]));
> -
> -             /*
> -              * If we write to ELSP a second time before the HW has had
> -              * a chance to respond to the previous write, we can confuse
> -              * the HW and hit "undefined behaviour". After writing to ELSP,
> -              * we must then wait until we see a context-switch event from
> -              * the HW to indicate that it has had a chance to respond.
> -              */
> -             if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
> -                     return;
> -
>               if (need_preempt(engine, last, rb)) {
> -                     inject_preempt_context(engine);
> -                     return;
> -             }
> +                     GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, 
> hint=%d\n",
> +                               engine->name,
> +                               last->fence.context,
> +                               last->fence.seqno,
> +                               last->sched.attr.priority,
> +                               execlists->queue_priority_hint);
> +                     /*
> +                      * Don't let the RING_HEAD advance past the breadcrumb
> +                      * as we unwind (and until we resubmit) so that we do
> +                      * not accidentally tell it to go backwards.
> +                      */
> +                     ring_pause(engine) = 1;
>  
> -             /*
> -              * In theory, we could coalesce more requests onto
> -              * the second port (the first port is active, with
> -              * no preemptions pending). However, that means we
> -              * then have to deal with the possible lite-restore
> -              * of the second port (as we submit the ELSP, there
> -              * may be a context-switch) but also we may complete
> -              * the resubmission before the context-switch. Ergo,
> -              * coalescing onto the second port will cause a
> -              * preemption event, but we cannot predict whether
> -              * that will affect port[0] or port[1].
> -              *
> -              * If the second port is already active, we can wait
> -              * until the next context-switch before contemplating
> -              * new requests. The GPU will be busy and we should be
> -              * able to resubmit the new ELSP before it idles,
> -              * avoiding pipeline bubbles (momentary pauses where
> -              * the driver is unable to keep up the supply of new
> -              * work). However, we have to double check that the
> -              * priorities of the ports haven't been switch.
> -              */
> -             if (port_count(&port[1]))
> -                     return;
> +                     /*
> +                      * Note that we have not stopped the GPU at this point,
> +                      * so we are unwinding the incomplete requests as they
> +                      * remain inflight and so by the time we do complete
> +                      * the preemption, some of the unwound requests may
> +                      * complete!
> +                      */
> +                     __unwind_incomplete_requests(engine);
>  
> -             /*
> -              * WaIdleLiteRestore:bdw,skl
> -              * Apply the wa NOOPs to prevent
> -              * ring:HEAD == rq:TAIL as we resubmit the
> -              * request. See gen8_emit_fini_breadcrumb() for
> -              * where we prepare the padding after the
> -              * end of the request.
> -              */
> -             last->tail = last->wa_tail;
> +                     /*
> +                      * If we need to return to the preempted context, we
> +                      * need to skip the lite-restore and force it to
> +                      * reload the RING_TAIL. Otherwise, the HW has a
> +                      * tendency to ignore us rewinding the TAIL to the
> +                      * end of an earlier request.
> +                      */
> +                     last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
> +                     last = NULL;
> +             } else {
> +                     /*
> +                      * Otherwise if we already have a request pending
> +                      * for execution after the current one, we can
> +                      * just wait until the next CS event before
> +                      * queuing more. In either case we will force a
> +                      * lite-restore preemption event, but if we wait
> +                      * we hopefully coalesce several updates into a single
> +                      * submission.
> +                      */
> +                     if (!list_is_last(&last->sched.link,
> +                                       &engine->active.requests))
> +                             return;
> +
> +                     /*
> +                      * WaIdleLiteRestore:bdw,skl
> +                      * Apply the wa NOOPs to prevent
> +                      * ring:HEAD == rq:TAIL as we resubmit the
> +                      * request. See gen8_emit_fini_breadcrumb() for
> +                      * where we prepare the padding after the
> +                      * end of the request.
> +                      */
> +                     last->tail = last->wa_tail;
> +             }
>       }
>  
>       while (rb) { /* XXX virtual is always taking precedence */
> @@ -955,9 +962,24 @@ static void execlists_dequeue(struct intel_engine_cs 
> *engine)
>                               continue;
>                       }
>  
> +                     if (i915_request_completed(rq)) {
> +                             ve->request = NULL;
> +                             ve->base.execlists.queue_priority_hint = 
> INT_MIN;
> +                             rb_erase_cached(rb, &execlists->virtual);
> +                             RB_CLEAR_NODE(rb);
> +
> +                             rq->engine = engine;
> +                             __i915_request_submit(rq);
> +
> +                             spin_unlock(&ve->base.active.lock);
> +
> +                             rb = rb_first_cached(&execlists->virtual);
> +                             continue;
> +                     }
> +
>                       if (last && !can_merge_rq(last, rq)) {
>                               spin_unlock(&ve->base.active.lock);
> -                             return; /* leave this rq for another engine */
> +                             return; /* leave this for another */
>                       }
>  
>                       GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? 
> %s\n",
> @@ -1006,9 +1028,10 @@ static void execlists_dequeue(struct intel_engine_cs 
> *engine)
>                       }
>  
>                       __i915_request_submit(rq);
> -                     trace_i915_request_in(rq, port_index(port, execlists));
> -                     submit = true;
> -                     last = rq;
> +                     if (!i915_request_completed(rq)) {
> +                             submit = true;
> +                             last = rq;
> +                     }
>               }
>  
>               spin_unlock(&ve->base.active.lock);
> @@ -1021,6 +1044,9 @@ static void execlists_dequeue(struct intel_engine_cs 
> *engine)
>               int i;
>  
>               priolist_for_each_request_consume(rq, rn, p, i) {
> +                     if (i915_request_completed(rq))
> +                             goto skip;
> +
>                       /*
>                        * Can we combine this request with the current port?
>                        * It has to be the same context/ringbuffer and not
> @@ -1060,19 +1086,14 @@ static void execlists_dequeue(struct intel_engine_cs 
> *engine)
>                                   ctx_single_port_submission(rq->hw_context))
>                                       goto done;
>  
> -
> -                             if (submit)
> -                                     port_assign(port, last);
> +                             *port = execlists_schedule_in(last, port - 
> execlists->pending);
>                               port++;
> -
> -                             GEM_BUG_ON(port_isset(port));
>                       }
>  
> -                     __i915_request_submit(rq);
> -                     trace_i915_request_in(rq, port_index(port, execlists));
> -
>                       last = rq;
>                       submit = true;
> +skip:
> +                     __i915_request_submit(rq);
>               }
>  
>               rb_erase_cached(&p->node, &execlists->queue);
> @@ -1097,54 +1118,30 @@ static void execlists_dequeue(struct intel_engine_cs 
> *engine)
>        * interrupt for secondary ports).
>        */
>       execlists->queue_priority_hint = queue_prio(execlists);
> +     GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n",
> +               engine->name, execlists->queue_priority_hint,
> +               yesno(submit));
>  
>       if (submit) {
> -             port_assign(port, last);
> +             *port = execlists_schedule_in(last, port - execlists->pending);
> +             memset(port + 1, 0, (last_port - port) * sizeof(*port));
>               execlists_submit_ports(engine);
>       }
> -
> -     /* We must always keep the beast fed if we have work piled up */
> -     GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
> -                !port_isset(execlists->port));
> -
> -     /* Re-evaluate the executing context setup after each preemptive kick */
> -     if (last)
> -             execlists_user_begin(execlists, execlists->port);
> -
> -     /* If the engine is now idle, so should be the flag; and vice versa. */
> -     GEM_BUG_ON(execlists_is_active(&engine->execlists,
> -                                    EXECLISTS_ACTIVE_USER) ==
> -                !port_isset(engine->execlists.port));
>  }
>  
>  void
>  execlists_cancel_port_requests(struct intel_engine_execlists * const 
> execlists)
>  {
> -     struct execlist_port *port = execlists->port;
> -     unsigned int num_ports = execlists_num_ports(execlists);
> -
> -     while (num_ports-- && port_isset(port)) {
> -             struct i915_request *rq = port_request(port);
> -
> -             GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
> -                       rq->engine->name,
> -                       (unsigned int)(port - execlists->port),
> -                       rq->fence.context, rq->fence.seqno,
> -                       hwsp_seqno(rq));
> +     struct i915_request * const *port, *rq;
>  
> -             GEM_BUG_ON(!execlists->active);
> -             execlists_context_schedule_out(rq,
> -                                            i915_request_completed(rq) ?
> -                                            INTEL_CONTEXT_SCHEDULE_OUT :
> -                                            
> INTEL_CONTEXT_SCHEDULE_PREEMPTED);
> +     for (port = execlists->pending; (rq = *port); port++)
> +             execlists_schedule_out(rq);
> +     memset(execlists->pending, 0, sizeof(execlists->pending));
>  
> -             i915_request_put(rq);
> -
> -             memset(port, 0, sizeof(*port));
> -             port++;
> -     }
> -
> -     execlists_clear_all_active(execlists);
> +     for (port = execlists->active; (rq = *port); port++)
> +             execlists_schedule_out(rq);
> +     execlists->active =
> +             memset(execlists->inflight, 0, sizeof(execlists->inflight));
>  }
>  
>  static inline void
> @@ -1163,7 +1160,6 @@ reset_in_progress(const struct intel_engine_execlists 
> *execlists)
>  static void process_csb(struct intel_engine_cs *engine)
>  {
>       struct intel_engine_execlists * const execlists = &engine->execlists;
> -     struct execlist_port *port = execlists->port;
>       const u32 * const buf = execlists->csb_status;
>       const u8 num_entries = execlists->csb_size;
>       u8 head, tail;
> @@ -1198,9 +1194,7 @@ static void process_csb(struct intel_engine_cs *engine)
>       rmb();
>  
>       do {
> -             struct i915_request *rq;
>               unsigned int status;
> -             unsigned int count;
>  
>               if (++head == num_entries)
>                       head = 0;
> @@ -1223,68 +1217,37 @@ static void process_csb(struct intel_engine_cs 
> *engine)
>                * status notifier.
>                */
>  
> -             GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> +             GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
>                         engine->name, head,
> -                       buf[2 * head + 0], buf[2 * head + 1],
> -                       execlists->active);
> +                       buf[2 * head + 0], buf[2 * head + 1]);
>  
>               status = buf[2 * head];
> -             if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> -                           GEN8_CTX_STATUS_PREEMPTED))
> -                     execlists_set_active(execlists,
> -                                          EXECLISTS_ACTIVE_HWACK);
> -             if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> -                     execlists_clear_active(execlists,
> -                                            EXECLISTS_ACTIVE_HWACK);
> -
> -             if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> -                     continue;
> +             if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) {
> +promote:
> +                     GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
> +                     execlists->active =
> +                             memcpy(execlists->inflight,
> +                                    execlists->pending,
> +                                    execlists_num_ports(execlists) *
> +                                    sizeof(*execlists->pending));
> +                     execlists->pending[0] = NULL;

I can't decide if comment or a helper inline function would
serve better as documentation of between inflight and pending
movement.

I guess it is better to be left as a future work after
the dust settles.

Just general yearning for a similar kind of level of documentation
steps as in dequeue.

>  
> -             /* We should never get a COMPLETED | IDLE_ACTIVE! */
> -             GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);

Is our assert coverage going to suffer?

> +                     if (!inject_preempt_hang(execlists))
> +                             ring_pause(engine) = 0;
> +             } else if (status & GEN8_CTX_STATUS_PREEMPTED) {
> +                     struct i915_request * const *port = execlists->active;
>  
> -             if (status & GEN8_CTX_STATUS_COMPLETE &&
> -                 buf[2*head + 1] == execlists->preempt_complete_status) {
> -                     GEM_TRACE("%s preempt-idle\n", engine->name);
> -                     complete_preempt_context(execlists);
> -                     continue;
> -             }
> -
> -             if (status & GEN8_CTX_STATUS_PREEMPTED &&
> -                 execlists_is_active(execlists,
> -                                     EXECLISTS_ACTIVE_PREEMPT))
> -                     continue;
> +                     trace_ports(execlists, "preempted", execlists->active);
>  
> -             GEM_BUG_ON(!execlists_is_active(execlists,
> -                                             EXECLISTS_ACTIVE_USER));
> +                     while (*port)
> +                             execlists_schedule_out(*port++);
>  
> -             rq = port_unpack(port, &count);
> -             GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), 
> prio=%d\n",
> -                       engine->name,
> -                       port->context_id, count,
> -                       rq ? rq->fence.context : 0,
> -                       rq ? rq->fence.seqno : 0,
> -                       rq ? hwsp_seqno(rq) : 0,
> -                       rq ? rq_prio(rq) : 0);
> +                     goto promote;
> +             } else if (*execlists->active) {
> +                     struct i915_request *rq = *execlists->active++;
>  
> -             /* Check the context/desc id for this event matches */
> -             GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> -
> -             GEM_BUG_ON(count == 0);
> -             if (--count == 0) {
> -                     /*
> -                      * On the final event corresponding to the
> -                      * submission of this context, we expect either
> -                      * an element-switch event or a completion
> -                      * event (and on completion, the active-idle
> -                      * marker). No more preemptions, lite-restore
> -                      * or otherwise.
> -                      */
> -                     GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> -                     GEM_BUG_ON(port_isset(&port[1]) &&
> -                                !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> -                     GEM_BUG_ON(!port_isset(&port[1]) &&
> -                                !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> +                     trace_ports(execlists, "completed",
> +                                 execlists->active - 1);
>  
>                       /*
>                        * We rely on the hardware being strongly
> @@ -1293,21 +1256,10 @@ static void process_csb(struct intel_engine_cs 
> *engine)
>                        * user interrupt and CSB is processed.
>                        */
>                       GEM_BUG_ON(!i915_request_completed(rq));
> +                     execlists_schedule_out(rq);
>  
> -                     execlists_context_schedule_out(rq,
> -                                                    
> INTEL_CONTEXT_SCHEDULE_OUT);
> -                     i915_request_put(rq);
> -
> -                     GEM_TRACE("%s completed ctx=%d\n",
> -                               engine->name, port->context_id);
> -
> -                     port = execlists_port_complete(execlists, port);
> -                     if (port_isset(port))
> -                             execlists_user_begin(execlists, port);
> -                     else
> -                             execlists_user_end(execlists);
> -             } else {
> -                     port_set(port, port_pack(rq, count));
> +                     GEM_BUG_ON(execlists->active - execlists->inflight >
> +                                execlists_num_ports(execlists));
>               }
>       } while (head != tail);
>  
> @@ -1332,7 +1284,7 @@ static void __execlists_submission_tasklet(struct 
> intel_engine_cs *const engine)
>       lockdep_assert_held(&engine->active.lock);
>  
>       process_csb(engine);
> -     if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> +     if (!engine->execlists.pending[0])
>               execlists_dequeue(engine);
>  }
>  
> @@ -1345,11 +1297,6 @@ static void execlists_submission_tasklet(unsigned long 
> data)
>       struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>       unsigned long flags;
>  
> -     GEM_TRACE("%s awake?=%d, active=%x\n",
> -               engine->name,
> -               !!intel_wakeref_active(&engine->wakeref),
> -               engine->execlists.active);
> -
>       spin_lock_irqsave(&engine->active.lock, flags);
>       __execlists_submission_tasklet(engine);
>       spin_unlock_irqrestore(&engine->active.lock, flags);
> @@ -1376,12 +1323,16 @@ static void __submit_queue_imm(struct intel_engine_cs 
> *engine)
>               tasklet_hi_schedule(&execlists->tasklet);
>  }
>  
> -static void submit_queue(struct intel_engine_cs *engine, int prio)
> +static void submit_queue(struct intel_engine_cs *engine,
> +                      const struct i915_request *rq)
>  {
> -     if (prio > engine->execlists.queue_priority_hint) {
> -             engine->execlists.queue_priority_hint = prio;
> -             __submit_queue_imm(engine);
> -     }
> +     struct intel_engine_execlists *execlists = &engine->execlists;
> +
> +     if (rq_prio(rq) <= execlists->queue_priority_hint)
> +             return;
> +
> +     execlists->queue_priority_hint = rq_prio(rq);
> +     __submit_queue_imm(engine);
>  }
>  
>  static void execlists_submit_request(struct i915_request *request)
> @@ -1397,7 +1348,7 @@ static void execlists_submit_request(struct 
> i915_request *request)
>       GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>       GEM_BUG_ON(list_empty(&request->sched.link));
>  
> -     submit_queue(engine, rq_prio(request));
> +     submit_queue(engine, request);
>  
>       spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
> @@ -2048,27 +1999,13 @@ static void execlists_reset_prepare(struct 
> intel_engine_cs *engine)
>       spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
> -static bool lrc_regs_ok(const struct i915_request *rq)
> -{
> -     const struct intel_ring *ring = rq->ring;
> -     const u32 *regs = rq->hw_context->lrc_reg_state;
> -
> -     /* Quick spot check for the common signs of context corruption */
> -
> -     if (regs[CTX_RING_BUFFER_CONTROL + 1] !=
> -         (RING_CTL_SIZE(ring->size) | RING_VALID))
> -             return false;
> -
> -     if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma))
> -             return false;
> -
> -     return true;
> -}
> -
> -static void reset_csb_pointers(struct intel_engine_execlists *execlists)
> +static void reset_csb_pointers(struct intel_engine_cs *engine)
>  {
> +     struct intel_engine_execlists * const execlists = &engine->execlists;
>       const unsigned int reset_value = execlists->csb_size - 1;
>  
> +     ring_pause(engine) = 0;
> +
>       /*
>        * After a reset, the HW starts writing into CSB entry [0]. We
>        * therefore have to set our HEAD pointer back one entry so that
> @@ -2115,18 +2052,21 @@ static void __execlists_reset(struct intel_engine_cs 
> *engine, bool stalled)
>       process_csb(engine); /* drain preemption events */
>  
>       /* Following the reset, we need to reload the CSB read/write pointers */
> -     reset_csb_pointers(&engine->execlists);
> +     reset_csb_pointers(engine);
>  
>       /*
>        * Save the currently executing context, even if we completed
>        * its request, it was still running at the time of the
>        * reset and will have been clobbered.
>        */
> -     if (!port_isset(execlists->port))
> -             goto out_clear;
> +     rq = execlists_active(execlists);
> +     if (!rq)
> +             return;
>  
> -     rq = port_request(execlists->port);
>       ce = rq->hw_context;
> +     GEM_BUG_ON(i915_active_is_idle(&ce->active));
> +     GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
> +     rq = active_request(rq);
>  
>       /*
>        * Catch up with any missed context-switch interrupts.
> @@ -2139,9 +2079,12 @@ static void __execlists_reset(struct intel_engine_cs 
> *engine, bool stalled)
>        */
>       execlists_cancel_port_requests(execlists);
>  
> -     rq = active_request(rq);
> -     if (!rq)
> +     if (!rq) {
> +             ce->ring->head = ce->ring->tail;
>               goto out_replay;
> +     }
> +
> +     ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
>  
>       /*
>        * If this request hasn't started yet, e.g. it is waiting on a
> @@ -2155,7 +2098,7 @@ static void __execlists_reset(struct intel_engine_cs 
> *engine, bool stalled)
>        * Otherwise, if we have not started yet, the request should replay
>        * perfectly and we do not need to flag the result as being erroneous.
>        */
> -     if (!i915_request_started(rq) && lrc_regs_ok(rq))
> +     if (!i915_request_started(rq))
>               goto out_replay;
>  
>       /*
> @@ -2170,7 +2113,7 @@ static void __execlists_reset(struct intel_engine_cs 
> *engine, bool stalled)
>        * image back to the expected values to skip over the guilty request.
>        */
>       i915_reset_request(rq, stalled);
> -     if (!stalled && lrc_regs_ok(rq))
> +     if (!stalled)
>               goto out_replay;
>  
>       /*
> @@ -2190,17 +2133,13 @@ static void __execlists_reset(struct intel_engine_cs 
> *engine, bool stalled)
>       execlists_init_reg_state(regs, ce, engine, ce->ring);
>  
>  out_replay:
> -     /* Rerun the request; its payload has been neutered (if guilty). */
> -     ce->ring->head =
> -             rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
> +     GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
> +               engine->name, ce->ring->head, ce->ring->tail);
>       intel_ring_update_space(ce->ring);
>       __execlists_update_reg_state(ce, engine);
>  
>       /* Push back any incomplete requests for replay after the reset. */
>       __unwind_incomplete_requests(engine);
> -
> -out_clear:
> -     execlists_clear_all_active(execlists);
>  }
>  
>  static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
> @@ -2296,7 +2235,6 @@ static void execlists_cancel_requests(struct 
> intel_engine_cs *engine)
>  
>       execlists->queue_priority_hint = INT_MIN;
>       execlists->queue = RB_ROOT_CACHED;
> -     GEM_BUG_ON(port_isset(execlists->port));
>  
>       GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
>       execlists->tasklet.func = nop_submission_tasklet;
> @@ -2514,15 +2452,29 @@ static u32 *gen8_emit_wa_tail(struct i915_request 
> *request, u32 *cs)
>       return cs;
>  }
>  
> +static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
> +{
> +     *cs++ = MI_SEMAPHORE_WAIT |
> +             MI_SEMAPHORE_GLOBAL_GTT |
> +             MI_SEMAPHORE_POLL |
> +             MI_SEMAPHORE_SAD_EQ_SDD;
> +     *cs++ = 0;
> +     *cs++ = intel_hws_preempt_address(request->engine);
> +     *cs++ = 0;
> +
> +     return cs;
> +}
> +
>  static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
>  {
>       cs = gen8_emit_ggtt_write(cs,
>                                 request->fence.seqno,
>                                 request->timeline->hwsp_offset,
>                                 0);
> -
>       *cs++ = MI_USER_INTERRUPT;
> +
>       *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;

This was discussed in irc, could warrant a comment here of
why this is needed. Precious info.

> +     cs = emit_preempt_busywait(request, cs);
>  
>       request->tail = intel_ring_offset(request, cs);
>       assert_ring_tail_valid(request->ring, request->tail);
> @@ -2543,9 +2495,10 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
> i915_request *request, u32 *cs)
>                                   PIPE_CONTROL_FLUSH_ENABLE |
>                                   PIPE_CONTROL_CS_STALL,
>                                   0);
> -
>       *cs++ = MI_USER_INTERRUPT;
> +
>       *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;

Comment could be copied here. Or a common helper created for common parts.

-Mika

> +     cs = emit_preempt_busywait(request, cs);
>  
>       request->tail = intel_ring_offset(request, cs);
>       assert_ring_tail_valid(request->ring, request->tail);
> @@ -2594,8 +2547,7 @@ void intel_execlists_set_default_submission(struct 
> intel_engine_cs *engine)
>       engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>       if (!intel_vgpu_active(engine->i915))
>               engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
> -     if (engine->preempt_context &&
> -         HAS_LOGICAL_RING_PREEMPTION(engine->i915))
> +     if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
>               engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>  }
>  
> @@ -2718,11 +2670,6 @@ int intel_execlists_submission_init(struct 
> intel_engine_cs *engine)
>                       i915_mmio_reg_offset(RING_ELSP(base));
>       }
>  
> -     execlists->preempt_complete_status = ~0u;
> -     if (engine->preempt_context)
> -             execlists->preempt_complete_status =
> -                     upper_32_bits(engine->preempt_context->lrc_desc);
> -
>       execlists->csb_status =
>               &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
>  
> @@ -2734,7 +2681,7 @@ int intel_execlists_submission_init(struct 
> intel_engine_cs *engine)
>       else
>               execlists->csb_size = GEN11_CSB_ENTRIES;
>  
> -     reset_csb_pointers(execlists);
> +     reset_csb_pointers(engine);
>  
>       return 0;
>  }
> @@ -2917,11 +2864,6 @@ populate_lr_context(struct intel_context *ce,
>       if (!engine->default_state)
>               regs[CTX_CONTEXT_CONTROL + 1] |=
>                       _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
> -     if (ce->gem_context == engine->i915->preempt_context &&
> -         INTEL_GEN(engine->i915) < 11)
> -             regs[CTX_CONTEXT_CONTROL + 1] |=
> -                     _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> -                                        CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
>  
>       ret = 0;
>  err_unpin_ctx:
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
> b/drivers/gpu/drm/i915/i915_gpu_error.c
> index b7e9fddef270..a497cf7acb6a 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1248,10 +1248,10 @@ static void error_record_engine_registers(struct 
> i915_gpu_state *error,
>       }
>  }
>  
> -static void record_request(struct i915_request *request,
> +static void record_request(const struct i915_request *request,
>                          struct drm_i915_error_request *erq)
>  {
> -     struct i915_gem_context *ctx = request->gem_context;
> +     const struct i915_gem_context *ctx = request->gem_context;
>  
>       erq->flags = request->fence.flags;
>       erq->context = request->fence.context;
> @@ -1315,20 +1315,15 @@ static void engine_record_requests(struct 
> intel_engine_cs *engine,
>       ee->num_requests = count;
>  }
>  
> -static void error_record_engine_execlists(struct intel_engine_cs *engine,
> +static void error_record_engine_execlists(const struct intel_engine_cs 
> *engine,
>                                         struct drm_i915_error_engine *ee)
>  {
>       const struct intel_engine_execlists * const execlists = 
> &engine->execlists;
> -     unsigned int n;
> +     struct i915_request * const *port = execlists->active;
> +     unsigned int n = 0;
>  
> -     for (n = 0; n < execlists_num_ports(execlists); n++) {
> -             struct i915_request *rq = port_request(&execlists->port[n]);
> -
> -             if (!rq)
> -                     break;
> -
> -             record_request(rq, &ee->execlist[n]);
> -     }
> +     while (*port)
> +             record_request(*port++, &ee->execlist[n++]);
>  
>       ee->num_ports = n;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_request.c 
> b/drivers/gpu/drm/i915/i915_request.c
> index 7083e6ab92c5..0c99694faab7 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -276,6 +276,12 @@ static bool i915_request_retire(struct i915_request *rq)
>  
>       local_irq_disable();
>  
> +     /*
> +      * We only loosely track inflight requests across preemption,
> +      * and so we may find ourselves attempting to retire a _completed_
> +      * request that we have removed from the HW and put back on a run
> +      * queue.
> +      */
>       spin_lock(&rq->engine->active.lock);
>       list_del(&rq->sched.link);
>       spin_unlock(&rq->engine->active.lock);
> diff --git a/drivers/gpu/drm/i915/i915_request.h 
> b/drivers/gpu/drm/i915/i915_request.h
> index edbbdfec24ab..bebc1e9b4a5e 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -28,6 +28,7 @@
>  #include <linux/dma-fence.h>
>  #include <linux/lockdep.h>
>  
> +#include "gt/intel_context_types.h"
>  #include "gt/intel_engine_types.h"
>  
>  #include "i915_gem.h"
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
> b/drivers/gpu/drm/i915/i915_scheduler.c
> index 2e9b38bdc33c..b1ba3e65cd52 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -179,8 +179,7 @@ static inline int rq_prio(const struct i915_request *rq)
>  
>  static void kick_submission(struct intel_engine_cs *engine, int prio)
>  {
> -     const struct i915_request *inflight =
> -             port_request(engine->execlists.port);
> +     const struct i915_request *inflight = *engine->execlists.active;
>  
>       /*
>        * If we are already the currently executing context, don't
> diff --git a/drivers/gpu/drm/i915/i915_utils.h 
> b/drivers/gpu/drm/i915/i915_utils.h
> index 2987219a6300..4920ff9aba62 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -131,6 +131,18 @@ __check_struct_size(size_t base, size_t arr, size_t 
> count, size_t *size)
>       ((typeof(ptr))((unsigned long)(ptr) | __bits));                 \
>  })
>  
> +#define ptr_count_dec(p_ptr) do {                                    \
> +     typeof(p_ptr) __p = (p_ptr);                                    \
> +     unsigned long __v = (unsigned long)(*__p);                      \
> +     *__p = (typeof(*p_ptr))(--__v);                                 \
> +} while (0)
> +
> +#define ptr_count_inc(p_ptr) do {                                    \
> +     typeof(p_ptr) __p = (p_ptr);                                    \
> +     unsigned long __v = (unsigned long)(*__p);                      \
> +     *__p = (typeof(*p_ptr))(++__v);                                 \
> +} while (0)
> +
>  #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
>  #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
>  #define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
> b/drivers/gpu/drm/i915/intel_guc_submission.c
> index db531ebc7704..12c22359fdac 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -32,7 +32,11 @@
>  #include "intel_guc_submission.h"
>  #include "i915_drv.h"
>  
> -#define GUC_PREEMPT_FINISHED         0x1
> +enum {
> +     GUC_PREEMPT_NONE = 0,
> +     GUC_PREEMPT_INPROGRESS,
> +     GUC_PREEMPT_FINISHED,
> +};
>  #define GUC_PREEMPT_BREADCRUMB_DWORDS        0x8
>  #define GUC_PREEMPT_BREADCRUMB_BYTES \
>       (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS)
> @@ -537,15 +541,11 @@ static void guc_add_request(struct intel_guc *guc, 
> struct i915_request *rq)
>       u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
>       u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
>  
> -     spin_lock(&client->wq_lock);
> -
>       guc_wq_item_append(client, engine->guc_id, ctx_desc,
>                          ring_tail, rq->fence.seqno);
>       guc_ring_doorbell(client);
>  
>       client->submissions[engine->id] += 1;
> -
> -     spin_unlock(&client->wq_lock);
>  }
>  
>  /*
> @@ -631,8 +631,9 @@ static void inject_preempt_context(struct work_struct 
> *work)
>       data[6] = intel_guc_ggtt_offset(guc, guc->shared_data);
>  
>       if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) {
> -             execlists_clear_active(&engine->execlists,
> -                                    EXECLISTS_ACTIVE_PREEMPT);
> +             intel_write_status_page(engine,
> +                                     I915_GEM_HWS_PREEMPT,
> +                                     GUC_PREEMPT_NONE);
>               tasklet_schedule(&engine->execlists.tasklet);
>       }
>  
> @@ -672,8 +673,6 @@ static void complete_preempt_context(struct 
> intel_engine_cs *engine)
>  {
>       struct intel_engine_execlists *execlists = &engine->execlists;
>  
> -     GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
> -
>       if (inject_preempt_hang(execlists))
>               return;
>  
> @@ -681,89 +680,90 @@ static void complete_preempt_context(struct 
> intel_engine_cs *engine)
>       execlists_unwind_incomplete_requests(execlists);
>  
>       wait_for_guc_preempt_report(engine);
> -     intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0);
> +     intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, GUC_PREEMPT_NONE);
>  }
>  
> -/**
> - * guc_submit() - Submit commands through GuC
> - * @engine: engine associated with the commands
> - *
> - * The only error here arises if the doorbell hardware isn't functioning
> - * as expected, which really shouln't happen.
> - */
> -static void guc_submit(struct intel_engine_cs *engine)
> +static void guc_submit(struct intel_engine_cs *engine,
> +                    struct i915_request **out,
> +                    struct i915_request **end)
>  {
>       struct intel_guc *guc = &engine->i915->guc;
> -     struct intel_engine_execlists * const execlists = &engine->execlists;
> -     struct execlist_port *port = execlists->port;
> -     unsigned int n;
> +     struct intel_guc_client *client = guc->execbuf_client;
>  
> -     for (n = 0; n < execlists_num_ports(execlists); n++) {
> -             struct i915_request *rq;
> -             unsigned int count;
> +     spin_lock(&client->wq_lock);
>  
> -             rq = port_unpack(&port[n], &count);
> -             if (rq && count == 0) {
> -                     port_set(&port[n], port_pack(rq, ++count));
> +     do {
> +             struct i915_request *rq = *out++;
>  
> -                     flush_ggtt_writes(rq->ring->vma);
> +             flush_ggtt_writes(rq->ring->vma);
> +             guc_add_request(guc, rq);
> +     } while (out != end);
>  
> -                     guc_add_request(guc, rq);
> -             }
> -     }
> +     spin_unlock(&client->wq_lock);
>  }
>  
> -static void port_assign(struct execlist_port *port, struct i915_request *rq)
> +static inline int rq_prio(const struct i915_request *rq)
>  {
> -     GEM_BUG_ON(port_isset(port));
> -
> -     port_set(port, i915_request_get(rq));
> +     return rq->sched.attr.priority | __NO_PREEMPTION;
>  }
>  
> -static inline int rq_prio(const struct i915_request *rq)
> +static struct i915_request *schedule_in(struct i915_request *rq, int idx)
>  {
> -     return rq->sched.attr.priority;
> +     trace_i915_request_in(rq, idx);
> +
> +     if (!rq->hw_context->inflight)
> +             rq->hw_context->inflight = rq->engine;
> +     intel_context_inflight_inc(rq->hw_context);
> +
> +     return i915_request_get(rq);
>  }
>  
> -static inline int port_prio(const struct execlist_port *port)
> +static void schedule_out(struct i915_request *rq)
>  {
> -     return rq_prio(port_request(port)) | __NO_PREEMPTION;
> +     trace_i915_request_out(rq);
> +
> +     intel_context_inflight_dec(rq->hw_context);
> +     if (!intel_context_inflight_count(rq->hw_context))
> +             rq->hw_context->inflight = NULL;
> +
> +     i915_request_put(rq);
>  }
>  
> -static bool __guc_dequeue(struct intel_engine_cs *engine)
> +static void __guc_dequeue(struct intel_engine_cs *engine)
>  {
>       struct intel_engine_execlists * const execlists = &engine->execlists;
> -     struct execlist_port *port = execlists->port;
> -     struct i915_request *last = NULL;
> -     const struct execlist_port * const last_port =
> -             &execlists->port[execlists->port_mask];
> +     struct i915_request **first = execlists->inflight;
> +     struct i915_request ** const last_port = first + execlists->port_mask;
> +     struct i915_request *last = first[0];
> +     struct i915_request **port;
>       bool submit = false;
>       struct rb_node *rb;
>  
>       lockdep_assert_held(&engine->active.lock);
>  
> -     if (port_isset(port)) {
> +     if (last) {
>               if (intel_engine_has_preemption(engine)) {
>                       struct guc_preempt_work *preempt_work =
>                               &engine->i915->guc.preempt_work[engine->id];
>                       int prio = execlists->queue_priority_hint;
>  
> -                     if (i915_scheduler_need_preempt(prio,
> -                                                     port_prio(port))) {
> -                             execlists_set_active(execlists,
> -                                                  EXECLISTS_ACTIVE_PREEMPT);
> +                     if (i915_scheduler_need_preempt(prio, rq_prio(last))) {
> +                             intel_write_status_page(engine,
> +                                                     I915_GEM_HWS_PREEMPT,
> +                                                     GUC_PREEMPT_INPROGRESS);
>                               queue_work(engine->i915->guc.preempt_wq,
>                                          &preempt_work->work);
> -                             return false;
> +                             return;
>                       }
>               }
>  
> -             port++;
> -             if (port_isset(port))
> -                     return false;
> +             if (*++first)
> +                     return;
> +
> +             last = NULL;
>       }
> -     GEM_BUG_ON(port_isset(port));
>  
> +     port = first;
>       while ((rb = rb_first_cached(&execlists->queue))) {
>               struct i915_priolist *p = to_priolist(rb);
>               struct i915_request *rq, *rn;
> @@ -774,18 +774,15 @@ static bool __guc_dequeue(struct intel_engine_cs 
> *engine)
>                               if (port == last_port)
>                                       goto done;
>  
> -                             if (submit)
> -                                     port_assign(port, last);
> +                             *port = schedule_in(last,
> +                                                 port - execlists->inflight);
>                               port++;
>                       }
>  
>                       list_del_init(&rq->sched.link);
> -
>                       __i915_request_submit(rq);
> -                     trace_i915_request_in(rq, port_index(port, execlists));
> -
> -                     last = rq;
>                       submit = true;
> +                     last = rq;
>               }
>  
>               rb_erase_cached(&p->node, &execlists->queue);
> @@ -794,58 +791,41 @@ static bool __guc_dequeue(struct intel_engine_cs 
> *engine)
>  done:
>       execlists->queue_priority_hint =
>               rb ? to_priolist(rb)->priority : INT_MIN;
> -     if (submit)
> -             port_assign(port, last);
> -     if (last)
> -             execlists_user_begin(execlists, execlists->port);
> -
> -     /* We must always keep the beast fed if we have work piled up */
> -     GEM_BUG_ON(port_isset(execlists->port) &&
> -                !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
> -     GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
> -                !port_isset(execlists->port));
> -
> -     return submit;
> -}
> -
> -static void guc_dequeue(struct intel_engine_cs *engine)
> -{
> -     if (__guc_dequeue(engine))
> -             guc_submit(engine);
> +     if (submit) {
> +             *port = schedule_in(last, port - execlists->inflight);
> +             *++port = NULL;
> +             guc_submit(engine, first, port);
> +     }
> +     execlists->active = execlists->inflight;
>  }
>  
>  static void guc_submission_tasklet(unsigned long data)
>  {
>       struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>       struct intel_engine_execlists * const execlists = &engine->execlists;
> -     struct execlist_port *port = execlists->port;
> -     struct i915_request *rq;
> +     struct i915_request **port, *rq;
>       unsigned long flags;
>  
>       spin_lock_irqsave(&engine->active.lock, flags);
>  
> -     rq = port_request(port);
> -     while (rq && i915_request_completed(rq)) {
> -             trace_i915_request_out(rq);
> -             i915_request_put(rq);
> +     for (port = execlists->inflight; (rq = *port); port++) {
> +             if (!i915_request_completed(rq))
> +                     break;
>  
> -             port = execlists_port_complete(execlists, port);
> -             if (port_isset(port)) {
> -                     execlists_user_begin(execlists, port);
> -                     rq = port_request(port);
> -             } else {
> -                     execlists_user_end(execlists);
> -                     rq = NULL;
> -             }
> +             schedule_out(rq);
> +     }
> +     if (port != execlists->inflight) {
> +             int idx = port - execlists->inflight;
> +             int rem = ARRAY_SIZE(execlists->inflight) - idx;
> +             memmove(execlists->inflight, port, rem * sizeof(*port));
>       }
>  
> -     if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
> -         intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
> +     if (intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
>           GUC_PREEMPT_FINISHED)
>               complete_preempt_context(engine);
>  
> -     if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
> -             guc_dequeue(engine);
> +     if (!intel_read_status_page(engine, I915_GEM_HWS_PREEMPT))
> +             __guc_dequeue(engine);
>  
>       spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
> @@ -959,7 +939,6 @@ static void guc_cancel_requests(struct intel_engine_cs 
> *engine)
>  
>       execlists->queue_priority_hint = INT_MIN;
>       execlists->queue = RB_ROOT_CACHED;
> -     GEM_BUG_ON(port_isset(execlists->port));
>  
>       spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
> @@ -1422,7 +1401,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
>        * and it is guaranteed that it will remove the work item from the
>        * queue before our request is completed.
>        */
> -     BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.port) *
> +     BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) *
>                    sizeof(struct guc_wq_item) *
>                    I915_NUM_ENGINES > GUC_WQ_SIZE);
>  
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c 
> b/drivers/gpu/drm/i915/selftests/i915_request.c
> index 298bb7116c51..1a5b9e284ca9 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -366,13 +366,15 @@ static int __igt_breadcrumbs_smoketest(void *arg)
>  
>               if (!wait_event_timeout(wait->wait,
>                                       i915_sw_fence_done(wait),
> -                                     HZ / 2)) {
> +                                     5 * HZ)) {
>                       struct i915_request *rq = requests[count - 1];
>  
> -                     pr_err("waiting for %d fences (last %llx:%lld) on %s 
> timed out!\n",
> -                            count,
> +                     pr_err("waiting for %d/%d fences (last %llx:%lld) on %s 
> timed out!\n",
> +                            atomic_read(&wait->pending), count,
>                              rq->fence.context, rq->fence.seqno,
>                              t->engine->name);
> +                     GEM_TRACE_DUMP();
> +
>                       i915_gem_set_wedged(t->engine->i915);
>                       GEM_BUG_ON(!i915_request_completed(rq));
>                       i915_sw_fence_wait(wait);
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915/execlists: Preempt-to-busy

Reply via email to