On Thu, Nov 26, 2015 at 09:19:44AM +0000, Nick Hoath wrote:
> On 26/11/2015 08:48, Daniel Vetter wrote:
> >On Wed, Nov 25, 2015 at 05:02:44PM +0200, Mika Kuoppala wrote:
> >>Nick Hoath <[email protected]> writes:
> >>
> >>>Use the first retired request on a new context to unpin
> >>>the old context. This ensures that the hw context remains
> >>>bound until it has been written back to by the GPU.
> >>>Now that the context is pinned until later in the request/context
> >>>lifecycle, it no longer needs to be pinned from context_queue to
> >>>retire_requests.
> >>>
> >>>v2: Moved the new pin to cover GuC submission (Alex Dai)
> >>>     Moved the new unpin to request_retire to fix coverage leak
> >>>v3: Added switch to default context if freeing a still pinned
> >>>     context just in case the hw was actually still using it
> >>>v4: Unwrapped context unpin to allow calling without a request
> >>>v5: Only create a switch to idle context if the ring doesn't
> >>>     already have a request pending on it (Alex Dai)
> >>>     Rename unsaved to dirty to avoid double negatives (Dave Gordon)
> >>>     Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
> >>>     Split out per engine cleanup from context_free as it
> >>>     was getting unwieldy
> >>>     Corrected locking (Dave Gordon)
> >>>
> >>>Signed-off-by: Nick Hoath <[email protected]>
> >>>Issue: VIZ-4277
> >>>Cc: Daniel Vetter <[email protected]>
> >>>Cc: David Gordon <[email protected]>
> >>>Cc: Chris Wilson <[email protected]>
> >>>Cc: Alex Dai <[email protected]>
> >>>---
> >>>  drivers/gpu/drm/i915/i915_drv.h  |   1 +
> >>>  drivers/gpu/drm/i915/i915_gem.c  |   3 +
> >>>  drivers/gpu/drm/i915/intel_lrc.c | 124 
> >>> +++++++++++++++++++++++++++++++--------
> >>>  drivers/gpu/drm/i915/intel_lrc.h |   1 +
> >>>  4 files changed, 105 insertions(+), 24 deletions(-)
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/i915_drv.h 
> >>>b/drivers/gpu/drm/i915/i915_drv.h
> >>>index d5cf30b..e82717a 100644
> >>>--- a/drivers/gpu/drm/i915/i915_drv.h
> >>>+++ b/drivers/gpu/drm/i915/i915_drv.h
> >>>@@ -889,6 +889,7 @@ struct intel_context {
> >>>   struct {
> >>>           struct drm_i915_gem_object *state;
> >>>           struct intel_ringbuffer *ringbuf;
> >>>+          bool dirty;
> >>>           int pin_count;
> >>>   } engine[I915_NUM_RINGS];
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> >>>b/drivers/gpu/drm/i915/i915_gem.c
> >>>index e955499..3829bc1 100644
> >>>--- a/drivers/gpu/drm/i915/i915_gem.c
> >>>+++ b/drivers/gpu/drm/i915/i915_gem.c
> >>>@@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct 
> >>>drm_i915_gem_request *request)
> >>>  {
> >>>   trace_i915_gem_request_retire(request);
> >>>
> >>>+  if (i915.enable_execlists)
> >>>+          intel_lr_context_complete_check(request);
> >>>+
> >>>   /* We know the GPU must have read the request to have
> >>>    * sent us the seqno + interrupt, so use the position
> >>>    * of tail of the request to update the last known position
> >>>diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> >>>b/drivers/gpu/drm/i915/intel_lrc.c
> >>>index 06180dc..03d5bca 100644
> >>>--- a/drivers/gpu/drm/i915/intel_lrc.c
> >>>+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >>>@@ -566,9 +566,6 @@ static int execlists_context_queue(struct 
> >>>drm_i915_gem_request *request)
> >>>   struct drm_i915_gem_request *cursor;
> >>>   int num_elements = 0;
> >>>
> >>>-  if (request->ctx != ring->default_context)
> >>>-          intel_lr_context_pin(request);
> >>>-
> >>>   i915_gem_request_reference(request);
> >>>
> >>>   spin_lock_irq(&ring->execlist_lock);
> >>>@@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct 
> >>>drm_i915_gem_request *request)
> >>>   if (intel_ring_stopped(ring))
> >>>           return;
> >>>
> >>>+  if (request->ctx != ring->default_context) {
> >>>+          if (!request->ctx->engine[ring->id].dirty) {
> >>>+                  intel_lr_context_pin(request);
> >>>+                  request->ctx->engine[ring->id].dirty = true;
> >>>+          }
> >>>+  }
> >>>+
> >>>   if (dev_priv->guc.execbuf_client)
> >>>           i915_guc_submit(dev_priv->guc.execbuf_client, request);
> >>>   else
> >>>@@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct 
> >>>intel_engine_cs *ring)
> >>>   spin_unlock_irq(&ring->execlist_lock);
> >>>
> >>>   list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> >>>-          struct intel_context *ctx = req->ctx;
> >>>-          struct drm_i915_gem_object *ctx_obj =
> >>>-                          ctx->engine[ring->id].state;
> >>>-
> >>>-          if (ctx_obj && (ctx != ring->default_context))
> >>>-                  intel_lr_context_unpin(req);
> >>>           list_del(&req->execlist_link);
> >>>           i915_gem_request_unreference(req);
> >>>   }
> >>>@@ -1058,21 +1056,39 @@ reset_pin_count:
> >>>   return ret;
> >>>  }
> >>>
> >>>-void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> >>>+static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
> >>>+          struct intel_context *ctx)
> >>>  {
> >>>-  struct intel_engine_cs *ring = rq->ring;
> >>>-  struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
> >>>-  struct intel_ringbuffer *ringbuf = rq->ringbuf;
> >>>-
> >>>+  struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> >>>+  struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
> >>>   if (ctx_obj) {
> >>>           WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> >>>-          if (--rq->ctx->engine[ring->id].pin_count == 0) {
> >>>+          if (--ctx->engine[ring->id].pin_count == 0) {
> >>>                   intel_unpin_ringbuffer_obj(ringbuf);
> >>>                   i915_gem_object_ggtt_unpin(ctx_obj);
> >>>           }
> >>>   }
> >>>  }
> >>>
> >>>+void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> >>>+{
> >>>+  __intel_lr_context_unpin(rq->ring, rq->ctx);
> >>>+}
> >>>+
> >>>+void intel_lr_context_complete_check(struct drm_i915_gem_request *req)
> >>>+{
> >>>+  struct intel_engine_cs *ring = req->ring;
> >>>+
> >>>+  if (ring->last_context && ring->last_context != req->ctx &&
> >>>+                  ring->last_context->engine[ring->id].dirty) {
> >>>+          __intel_lr_context_unpin(
> >>>+                          ring,
> >>>+                          ring->last_context);
> >>>+          ring->last_context->engine[ring->id].dirty = false;
> >>>+  }
> >>>+  ring->last_context = req->ctx;
> >>
> >>What ensures that the last_context stays alive?
> >
> >The other part that's missing compared to the legacy context cycling logic
> >is move_to_active. Without that the shrinker can't eat into retired
> >contexts, which renders this exercise fairly moot imo.
> >
> >The overall idea is that since i915 does dynamic memory management, and
> >that infects everything in the code that deals with gpu objects, we need
> >to make sure that everyone is using the same logicy to cycle through
> >active objects. Otherwise the shrinker will be an unmaintainable hydra
> >with per-feature special cases.
> >-Daniel
> 
> This is the first part of the VIZ-4277 fix changeset that I'm working on. It
> has been 'fasttracked' as it is needed to fix a lockup with GuC submission.
> If you like, I can update the commit message to explain that?

Yeah if this is a minimal bugfix for some issue then this definitely
should be mentioned in the commit message, including a precise description
of what and how exactly the current code blows up.
-Daniel

> 
> >
> >>
> >>>+}
> >>>+
> >>>  static int intel_logical_ring_workarounds_emit(struct 
> >>> drm_i915_gem_request *req)
> >>>  {
> >>>   int ret, i;
> >>>@@ -2367,6 +2383,62 @@ populate_lr_context(struct intel_context *ctx, 
> >>>struct drm_i915_gem_object *ctx_o
> >>>  }
> >>>
> >>>  /**
> >>>+ * intel_lr_context_clean_ring() - clean the ring specific parts of an LRC
> >>>+ * @ctx: the LR context being freed.
> >>>+ * @ring: the engine being cleaned
> >>>+ * @ctx_obj: the hw context being unreferenced
> >>>+ * @ringbuf: the ringbuf being freed
> >>>+ *
> >>>+ * Take care of cleaning up the per-engine backing
> >>>+ * objects and the logical ringbuffer.
> >>>+ */
> >>>+static void
> >>>+intel_lr_context_clean_ring(struct intel_context *ctx,
> >>>+                      struct intel_engine_cs *ring,
> >>>+                      struct drm_i915_gem_object *ctx_obj,
> >>>+                      struct intel_ringbuffer *ringbuf)
> >>>+{
> >>>+  WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> >>>+
> >>>+  if (ctx == ring->default_context) {
> >>>+          intel_unpin_ringbuffer_obj(ringbuf);
> >>>+          i915_gem_object_ggtt_unpin(ctx_obj);
> >>>+  }
> >>>+
> >>>+  if (ctx->engine[ring->id].dirty) {
> >>>+          struct drm_i915_gem_request *req = NULL;
> >>>+
> >>>+          /**
> >>>+           * If there is already a request pending on
> >>>+           * this ring, wait for that to complete,
> >>>+           * otherwise create a switch to idle request
> >>>+           */
> >>>+          if (list_empty(&ring->request_list)) {
> >>>+                  int ret;
> >>>+
> >>>+                  ret = i915_gem_request_alloc(
> >>>+                                  ring,
> >>>+                                  ring->default_context,
> >>>+                                  &req);
> >>>+                  if (!ret)
> >>>+                          i915_add_request(req);
> >>>+                  else
> >>>+                          DRM_DEBUG("Failed to ensure context saved");
> >>>+          } else {
> >>>+                  req = list_first_entry(
> >>>+                                  &ring->request_list,
> >>>+                                  typeof(*req), list);
> >>>+          }
> >>>+          if (req)
> >>>+                  i915_wait_request(req);
> >>>+  }
> >>>+
> >>>+  WARN_ON(ctx->engine[ring->id].pin_count);
> >>>+  intel_ringbuffer_free(ringbuf);
> >>>+  drm_gem_object_unreference(&ctx_obj->base);
> >>>+}
> >>>+
> >>>+/**
> >>>   * intel_lr_context_free() - free the LRC specific bits of a context
> >>>   * @ctx: the LR context to free.
> >>>   *
> >>>@@ -2378,21 +2450,18 @@ void intel_lr_context_free(struct intel_context 
> >>>*ctx)
> >>>  {
> >>>   int i;
> >>>
> >>>-  for (i = 0; i < I915_NUM_RINGS; i++) {
> >>>+  for (i = 0; i < I915_NUM_RINGS; ++i) {
> >>>           struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
> >>>
> >>>-          if (ctx_obj) {
> >>>+          if (ctx->engine[i].state) {
> >>
> >>++i and the above are superflouous?
> >>
> >>-Mika
> >>
> >>>                   struct intel_ringbuffer *ringbuf =
> >>>                                   ctx->engine[i].ringbuf;
> >>>                   struct intel_engine_cs *ring = ringbuf->ring;
> >>>
> >>>-                  if (ctx == ring->default_context) {
> >>>-                          intel_unpin_ringbuffer_obj(ringbuf);
> >>>-                          i915_gem_object_ggtt_unpin(ctx_obj);
> >>>-                  }
> >>>-                  WARN_ON(ctx->engine[ring->id].pin_count);
> >>>-                  intel_ringbuffer_free(ringbuf);
> >>>-                  drm_gem_object_unreference(&ctx_obj->base);
> >>>+                  intel_lr_context_clean_ring(ctx,
> >>>+                                              ring,
> >>>+                                              ctx_obj,
> >>>+                                              ringbuf);
> >>>           }
> >>>   }
> >>>  }
> >>>@@ -2554,5 +2623,12 @@ void intel_lr_context_reset(struct drm_device *dev,
> >>>
> >>>           ringbuf->head = 0;
> >>>           ringbuf->tail = 0;
> >>>+
> >>>+          if (ctx->engine[ring->id].dirty) {
> >>>+                  __intel_lr_context_unpin(
> >>>+                                  ring,
> >>>+                                  ctx);
> >>>+                  ctx->engine[ring->id].dirty = false;
> >>>+          }
> >>>   }
> >>>  }
> >>>diff --git a/drivers/gpu/drm/i915/intel_lrc.h 
> >>>b/drivers/gpu/drm/i915/intel_lrc.h
> >>>index 4e60d54..cbc42b8 100644
> >>>--- a/drivers/gpu/drm/i915/intel_lrc.h
> >>>+++ b/drivers/gpu/drm/i915/intel_lrc.h
> >>>@@ -86,6 +86,7 @@ void intel_lr_context_reset(struct drm_device *dev,
> >>>                   struct intel_context *ctx);
> >>>  uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
> >>>                                struct intel_engine_cs *ring);
> >>>+void intel_lr_context_complete_check(struct drm_i915_gem_request *req);
> >>>
> >>>  /* Execlists */
> >>>  int intel_sanitize_enable_execlists(struct drm_device *dev, int 
> >>> enable_execlists);
> >>>--
> >>>1.9.1
> >>>
> >>>_______________________________________________
> >>>Intel-gfx mailing list
> >>>[email protected]
> >>>http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >>_______________________________________________
> >>Intel-gfx mailing list
> >>[email protected]
> >>http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to