Chris Wilson <[email protected]> writes:

> The engine also provides a mirror of the CSB write pointer in the HWSP,
> but not of our read pointer. To take advantage of this we need to
> remember where we read up to on the last interrupt and continue off from
> there. This poses a problem following a reset, as we don't know where
> the hw will start writing from, and due to the use of power contexts we
> cannot perform that query during the reset itself. So we continue the
> current modus operandi of delaying the first read of the context-status
> read/write pointers until after the first interrupt. With this we should
> now have eliminated all uncached mmio reads in handling the
> context-status interrupt, though we still have the uncached mmio writes
> for submitting new work, and many uncached mmio reads in the global
> interrupt handler itself. Still a step in the right direction towards
> reducing our resubmit latency, although it appears lost in the noise!
>
> v2: Cannonlake moved the CSB write index
> v3: Include the sw/hwsp state in debugfs/i915_engine_info
> v4: Also revert to using CSB mmio for GVT-g
>
> Signed-off-by: Chris Wilson <[email protected]>
> Cc: Michel Thierry <[email protected]>
> Cc: Tvrtko Ursulin <[email protected]>
> Cc: Mika Kuoppala <[email protected]>
> Cc: Daniele Ceraolo Spurio <[email protected]>
> Cc: Zhenyu Wang <[email protected]>
> Cc: Zhi Wang <[email protected]>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c     |  6 ++++--
>  drivers/gpu/drm/i915/i915_drv.h         |  8 ++++++++
>  drivers/gpu/drm/i915/intel_lrc.c        | 25 ++++++++++++++++++++-----
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  3 +++
>  4 files changed, 35 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 5fd01c14a3ec..552aef61b47b 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3395,8 +3395,10 @@ static int i915_engine_info(struct seq_file *m, void 
> *unused)
>                       ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
>                       read = GEN8_CSB_READ_PTR(ptr);
>                       write = GEN8_CSB_WRITE_PTR(ptr);
> -                     seq_printf(m, "\tExeclist CSB read %d, write %d\n",
> -                                read, write);
> +                     seq_printf(m, "\tExeclist CSB read %d [%d cached], 
> write %d [%d from hws]\n",
> +                                read, engine->csb_head,
> +                                write,
> +                                intel_read_status_page(engine, 
> intel_hws_csb_write_index(engine->i915)));
>                       if (read >= GEN8_CSB_ENTRIES)
>                               read = 0;
>                       if (write >= GEN8_CSB_ENTRIES)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 81cd21ecfa7d..f62c9db8a9a8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -4228,4 +4228,12 @@ static inline bool i915_gem_object_is_coherent(struct 
> drm_i915_gem_object *obj)
>               HAS_LLC(to_i915(obj->base.dev)));
>  }
>  
> +static inline int intel_hws_csb_write_index(struct drm_i915_private *i915)
> +{
> +     if (INTEL_GEN(i915) >= 10)
> +             return CNL_HWS_CSB_WRITE_INDEX;
> +     else
> +             return I915_HWS_CSB_WRITE_INDEX;
> +}
> +
>  #endif
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 5b721f65d232..7c3dce27e504 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -556,6 +556,7 @@ static void intel_lrc_irq_handler(unsigned long data)
>               if (unlikely(intel_vgpu_active(dev_priv))) {
>                       buf = (u32 * __force)
>                               (dev_priv->regs + 
> i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> +                     engine->csb_head = -1;
>               }
>  
>               /* The write will be ordered by the uncached read (itself
> @@ -569,9 +570,19 @@ static void intel_lrc_irq_handler(unsigned long data)
>                * is set and we do a new loop.
>                */
>               __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -             head = readl(csb_mmio);
> -             tail = GEN8_CSB_WRITE_PTR(head);
> -             head = GEN8_CSB_READ_PTR(head);
> +             if (unlikely(engine->csb_head == -1)) { /* following a reset */
> +                     head = readl(csb_mmio);
> +                     tail = GEN8_CSB_WRITE_PTR(head);
> +                     head = GEN8_CSB_READ_PTR(head);
> +                     engine->csb_head = head;
> +             } else {
> +                     const int write_idx =
> +                             intel_hws_csb_write_index(dev_priv) -
> +                             I915_HWS_CSB_BUF0_INDEX;
> +
> +                     head = engine->csb_head;
> +                     tail = buf[write_idx];
> +             }

I have discussed this with Chris already in irc but I have a kbl
that can't survive the patch. The hwsp tail seems to update
inside the loop without corresponding interrupt.

And it results in a system hang instead of hangcheck firing.
Which is a another mystery byitself.

Resampling the tail inside the loop makes it work.

-Mika

>               while (head != tail) {
>                       struct drm_i915_gem_request *rq;
>                       unsigned int status;
> @@ -625,8 +636,11 @@ static void intel_lrc_irq_handler(unsigned long data)
>                                  !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
>               }
>  
> -             writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> -                    csb_mmio);
> +             if (head != engine->csb_head) {
> +                     engine->csb_head = head;
> +                     writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> +                            csb_mmio);
> +             }
>       }
>  
>       if (execlists_elsp_ready(engine))
> @@ -1253,6 +1267,7 @@ static int gen8_init_common_ring(struct intel_engine_cs 
> *engine)
>  
>       /* After a GPU reset, we may have requests to replay */
>       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> +     engine->csb_head = -1;
>  
>       submit = false;
>       for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
> b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 2c55cfa14fb5..a182da7eb9a9 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -391,6 +391,7 @@ struct intel_engine_cs {
>       struct rb_root execlist_queue;
>       struct rb_node *execlist_first;
>       unsigned int fw_domains;
> +     unsigned int csb_head;
>  
>       /* Contexts are pinned whilst they are active on the GPU. The last
>        * context executed remains active whilst the GPU is idle - the
> @@ -497,6 +498,8 @@ intel_write_status_page(struct intel_engine_cs *engine, 
> int reg, u32 value)
>  #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << 
> MI_STORE_DWORD_INDEX_SHIFT)
>  
>  #define I915_HWS_CSB_BUF0_INDEX              0x10
> +#define I915_HWS_CSB_WRITE_INDEX     0x1f
> +#define CNL_HWS_CSB_WRITE_INDEX              0x2f
>  
>  struct intel_ring *
>  intel_engine_create_ring(struct intel_engine_cs *engine, int size);
> -- 
> 2.13.2
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to