Thanks Chris. 
This is Xiao from GVT-g team. GVT-g is actually looking for this patch once GUC 
submission enabled in host/native side.  GVT-g requires a single submission 
(only one request in GUC FW anytime) to GUC FW because we need to do additional 
setup/cleanup for VM switch.
One thing need to mention:  we need  'execlists_context_status_change' 
notification before i915_guc_submit() just what we did for ELSP port write. 
Anyway we can submit another patch based on this. 
The question is:  when will this patch be merged to upstream?

> -----Original Message-----
> From: Intel-gfx [mailto:[email protected]] On Behalf
> Of Chris Wilson
> Sent: Thursday, February 2, 2017 6:36 PM
> To: [email protected]
> Subject: [Intel-gfx] [PATCH v8] drm/i915/scheduler: emulate a scheduler for
> guc
> 
> This emulates execlists on top of the GuC in order to defer submission of
> requests to the hardware. This deferral allows time for high priority requests
> to gazump their way to the head of the queue, however it nerfs the GuC by
> converting it back into a simple execlist (where the CPU has to wake up after
> every request to feed new commands into the GuC).
> 
> v2: Drop hack status - though iirc there is still a lockdep inversion between
> fence and engine->timeline->lock (which is impossible as the nesting only
> occurs on different fences - hopefully just requires some judicious lockdep
> annotation)
> v3: Apply lockdep nesting to enabling signaling on the request, using the
> pattern we already have in __i915_gem_request_submit();
> v4: Replaying requests after a hang also now needs the timeline spinlock, to
> disable the interrupts at least
> v5: Hold wq lock for completeness, and emit a tracepoint for enabling signal
> v6: Reorder interrupt checking for a happier gcc.
> v7: Only signal the tasklet after a user-interrupt if using guc scheduling
> v8: Restore lost update of rq through the i915_guc_irq_handler (Tvrtko)
> 
> Signed-off-by: Chris Wilson <[email protected]>
> Cc: Tvrtko Ursulin <[email protected]>
> Reviewed-by: Tvrtko Ursulin <[email protected]>
> ---
>  drivers/gpu/drm/i915/i915_guc_submission.c | 110
> +++++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/i915_irq.c            |  13 +++-
>  drivers/gpu/drm/i915/intel_lrc.c           |   5 +-
>  3 files changed, 114 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
> b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 8ced9e26f075..d99185aafe58 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -25,6 +25,8 @@
>  #include "i915_drv.h"
>  #include "intel_uc.h"
> 
> +#include <trace/events/dma_fence.h>
> +
>  /**
>   * DOC: GuC-based command submission
>   *
> @@ -348,7 +350,7 @@ int i915_guc_wq_reserve(struct
> drm_i915_gem_request *request)
>       u32 freespace;
>       int ret;
> 
> -     spin_lock(&client->wq_lock);
> +     spin_lock_irq(&client->wq_lock);
>       freespace = CIRC_SPACE(client->wq_tail, desc->head, client-
> >wq_size);
>       freespace -= client->wq_rsvd;
>       if (likely(freespace >= wqi_size)) {
> @@ -358,7 +360,7 @@ int i915_guc_wq_reserve(struct
> drm_i915_gem_request *request)
>               client->no_wq_space++;
>               ret = -EAGAIN;
>       }
> -     spin_unlock(&client->wq_lock);
> +     spin_unlock_irq(&client->wq_lock);
> 
>       return ret;
>  }
> @@ -370,9 +372,9 @@ void i915_guc_wq_unreserve(struct
> drm_i915_gem_request *request)
> 
>       GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
> 
> -     spin_lock(&client->wq_lock);
> +     spin_lock_irq(&client->wq_lock);
>       client->wq_rsvd -= wqi_size;
> -     spin_unlock(&client->wq_lock);
> +     spin_unlock_irq(&client->wq_lock);
>  }
> 
>  /* Construct a Work Item and append it to the GuC's Work Queue */ @@ -
> 532,10 +534,97 @@ static void __i915_guc_submit(struct
> drm_i915_gem_request *rq)
> 
>  static void i915_guc_submit(struct drm_i915_gem_request *rq)  {
> -     i915_gem_request_submit(rq);
> +     __i915_gem_request_submit(rq);
>       __i915_guc_submit(rq);
>  }
> 
> +static void nested_enable_signaling(struct drm_i915_gem_request *rq) {
> +     /* If we use dma_fence_enable_sw_signaling() directly, lockdep
> +      * detects an ordering issue between the fence lockclass and the
> +      * global_timeline. This circular dependency can only occur via 2
> +      * different fences (but same fence lockclass), so we use the nesting
> +      * annotation here to prevent the warn, equivalent to the nesting
> +      * inside i915_gem_request_submit() for when we also enable the
> +      * signaler.
> +      */
> +
> +     if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> +                          &rq->fence.flags))
> +             return;
> +
> +     GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq-
> >fence.flags));
> +     trace_dma_fence_enable_signal(&rq->fence);
> +
> +     spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
> +     intel_engine_enable_signaling(rq);
> +     spin_unlock(&rq->lock);
> +}
> +
> +static bool i915_guc_dequeue(struct intel_engine_cs *engine) {
> +     struct execlist_port *port = engine->execlist_port;
> +     struct drm_i915_gem_request *last = port[0].request;
> +     unsigned long flags;
> +     struct rb_node *rb;
> +     bool submit = false;
> +
> +     spin_lock_irqsave(&engine->timeline->lock, flags);
> +     rb = engine->execlist_first;
> +     while (rb) {
> +             struct drm_i915_gem_request *cursor =
> +                     rb_entry(rb, typeof(*cursor), priotree.node);
> +
> +             if (last && cursor->ctx != last->ctx) {
> +                     if (port != engine->execlist_port)
> +                             break;
> +
> +                     i915_gem_request_assign(&port->request, last);
> +                     nested_enable_signaling(last);
> +                     port++;
> +             }
> +
> +             rb = rb_next(rb);
> +             rb_erase(&cursor->priotree.node, &engine-
> >execlist_queue);
> +             RB_CLEAR_NODE(&cursor->priotree.node);
> +             cursor->priotree.priority = INT_MAX;
> +
> +             i915_guc_submit(cursor);
> +             last = cursor;
> +             submit = true;
> +     }
> +     if (submit) {
> +             i915_gem_request_assign(&port->request, last);
> +             nested_enable_signaling(last);
> +             engine->execlist_first = rb;
> +     }
> +     spin_unlock_irqrestore(&engine->timeline->lock, flags);
> +
> +     return submit;
> +}
> +
> +static void i915_guc_irq_handler(unsigned long data) {
> +     struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
> +     struct execlist_port *port = engine->execlist_port;
> +     struct drm_i915_gem_request *rq;
> +     bool submit;
> +
> +     do {
> +             rq = port[0].request;
> +             while (rq && i915_gem_request_completed(rq)) {
> +                     i915_gem_request_put(rq);
> +                     port[0].request = port[1].request;
> +                     port[1].request = NULL;
> +                     rq = port[0].request;
> +             }
> +
> +             submit = false;
> +             if (!port[1].request)
> +                     submit = i915_guc_dequeue(engine);
> +     } while (submit);
> +}
> +
>  /*
>   * Everything below here is concerned with setup & teardown, and is
>   * therefore not part of the somewhat time-critical batch-submission @@ -
> 944,15 +1033,22 @@ int i915_guc_submission_enable(struct
> drm_i915_private *dev_priv)
>       /* Take over from manual control of ELSP (execlists) */
>       for_each_engine(engine, dev_priv, id) {
>               struct drm_i915_gem_request *rq;
> +             unsigned long flags;
> 
> -             engine->submit_request = i915_guc_submit;
> -             engine->schedule = NULL;
> +             tasklet_init(&engine->irq_tasklet,
> +                          i915_guc_irq_handler,
> +                          (unsigned long)engine);
> 
>               /* Replay the current set of previously submitted requests */
> +             spin_lock_irqsave(&engine->timeline->lock, flags);
>               list_for_each_entry(rq, &engine->timeline->requests, link) {
> +                     spin_lock(&client->wq_lock);
>                       client->wq_rsvd += sizeof(struct guc_wq_item);
> +                     spin_unlock(&client->wq_lock);
> +
>                       __i915_guc_submit(rq);
>               }
> +             spin_unlock_irqrestore(&engine->timeline->lock, flags);
>       }
> 
>       return 0;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c
> b/drivers/gpu/drm/i915/i915_irq.c index 436b5baf38c2..72f9497518bc 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1350,13 +1350,20 @@ static void snb_gt_irq_handler(struct
> drm_i915_private *dev_priv,  static __always_inline void
> gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)  
> {
> -     if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
> -             notify_ring(engine);
> +     bool tasklet = false;
> 
>       if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
>               set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -             tasklet_hi_schedule(&engine->irq_tasklet);
> +             tasklet = true;
>       }
> +
> +     if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
> +             notify_ring(engine);
> +             tasklet |= i915.enable_guc_submission;
> +     }
> +
> +     if (tasklet)
> +             tasklet_hi_schedule(&engine->irq_tasklet);
>  }
> 
>  static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, diff --
> git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 44a92ea464ba..c81d86afb52f 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1278,7 +1278,7 @@ static int gen8_init_common_ring(struct
> intel_engine_cs *engine)
> 
>       /* After a GPU reset, we may have requests to replay */
>       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -     if (!execlists_elsp_idle(engine)) {
> +     if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
>               engine->execlist_port[0].count = 0;
>               engine->execlist_port[1].count = 0;
>               execlists_submit_ports(engine);
> @@ -1345,9 +1345,6 @@ static void reset_common_ring(struct
> intel_engine_cs *engine,
>       request->ring->last_retired_head = -1;
>       intel_ring_update_space(request->ring);
> 
> -     if (i915.enable_guc_submission)
> -             return;
> -
>       /* Catch up with any missed context-switch interrupts */
>       if (request->ctx != port[0].request->ctx) {
>               i915_gem_request_put(port[0].request);
> --
> 2.11.0
> 
> _______________________________________________
> Intel-gfx mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to