On Fri, Aug 05, 2016 at 10:06:04AM +0100, Chris Wilson wrote:
> Our error states are quickly growing, pinning kernel memory with them.
> The majority of the space is taken up by the error objects. These
> compress well using zlib and without decode are mostly meaningless, so
> encoding them does not hinder quickly parsing the error state for
> familiarity.
> 
> Signed-off-by: Chris Wilson <[email protected]>

Seems to also contain a wholesale rework of the capture logic using a ggtt
mappable entry. Would explain why the missing clflush isn't an issue for
you. Imo best if that part is reordered as the first patch (or at least
before stop_machine, which requires the removal of cflush), and then the
zlib on top.

On the idea itself, since I have no clue: How do we uncompress these
again? Patched intel_error_decode, or can zlib deal with in-line streams?
-Daniel

> ---
>  drivers/gpu/drm/i915/Kconfig          |   1 +
>  drivers/gpu/drm/i915/i915_drv.h       |   4 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c   |  10 ++
>  drivers/gpu/drm/i915/i915_gem_gtt.h   |   2 +
>  drivers/gpu/drm/i915/i915_gpu_error.c | 271 
> +++++++++++++++++-----------------
>  5 files changed, 148 insertions(+), 140 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 7badcee88ebf..c8ea20526aef 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -5,6 +5,7 @@ config DRM_I915
>       select INTEL_GTT
>       select INTERVAL_TREE
>       select STOP_MACHINE
> +     select ZLIB_DEFLATE
>       # we need shmfs for the swappable backing store, and in particular
>       # the shmem_readpage() which depends upon tmpfs
>       select SHMEM
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 370a4c9eea70..edc1e6d6be0d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -534,6 +534,7 @@ struct drm_i915_error_state {
>               u32 tail;
>               u32 head;
>               u32 ctl;
> +             u32 mode;
>               u32 hws;
>               u32 ipeir;
>               u32 ipehr;
> @@ -550,9 +551,10 @@ struct drm_i915_error_state {
>               u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
>  
>               struct drm_i915_error_object {
> -                     int page_count;
>                       u64 gtt_offset;
>                       u64 gtt_size;
> +                     int page_count;
> +                     int unused;
>                       u32 *pages[0];
>               } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7c9f654d515a..c45e7f456cea 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2737,6 +2737,15 @@ int i915_gem_init_ggtt(struct drm_i915_private 
> *dev_priv)
>       if (ret)
>               return ret;
>  
> +     /* Reserve a mappable slot for our lockless error capture */
> +     ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
> +                                               &ggtt->gpu_error,
> +                                               4096, 0, -1,
> +                                               0, ggtt->mappable_end,
> +                                               0, 0);
> +     if (ret)
> +             return ret;
> +
>       /* Clear any non-preallocated blocks */
>       drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
>               DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
> @@ -2804,6 +2813,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private 
> *dev_priv)
>       if (drm_mm_initialized(&ggtt->base.mm)) {
>               intel_vgt_deballoon(dev_priv);
>  
> +             drm_mm_remove_node(&ggtt->gpu_error);
>               drm_mm_takedown(&ggtt->base.mm);
>               list_del(&ggtt->base.global_link);
>       }
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
> b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index c54ae2323df3..010e10c0b62b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -428,6 +428,8 @@ struct i915_ggtt {
>       bool do_idle_maps;
>  
>       int mtrr;
> +
> +     struct drm_mm_node gpu_error;
>  };
>  
>  struct i915_hw_ppgtt {
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
> b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 6078f47d4bc0..f036584e55e3 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -29,6 +29,7 @@
>  
>  #include <generated/utsrelease.h>
>  #include <linux/stop_machine.h>
> +#include <linux/zlib.h>
>  #include "i915_drv.h"
>  
>  static const char *engine_str(int engine)
> @@ -237,6 +238,8 @@ static void error_print_engine(struct 
> drm_i915_error_state_buf *m,
>       err_printf(m, "  HEAD:  0x%08x\n", ee->head);
>       err_printf(m, "  TAIL:  0x%08x\n", ee->tail);
>       err_printf(m, "  CTL:   0x%08x\n", ee->ctl);
> +     err_printf(m, "  MODE:  0x%08x [idle? %d]\n",
> +                ee->mode, !!(ee->mode & MODE_IDLE));
>       err_printf(m, "  HWS:   0x%08x\n", ee->hws);
>       err_printf(m, "  ACTHD: 0x%08x %08x\n",
>                  (u32)(ee->acthd>>32), (u32)ee->acthd);
> @@ -307,18 +310,46 @@ void i915_error_printf(struct drm_i915_error_state_buf 
> *e, const char *f, ...)
>       va_end(args);
>  }
>  
> +static bool
> +ascii85_encode(u32 in, char *out)
> +{
> +     int i;
> +
> +     if (in == 0)
> +             return false;
> +
> +     out[5] = '\0';
> +     for (i = 5; i--; ) {
> +             out[i] = '!' + in % 85;
> +             in /= 85;
> +     }
> +
> +     return true;
> +}
> +
>  static void print_error_obj(struct drm_i915_error_state_buf *m,
>                           struct drm_i915_error_object *obj)
>  {
> -     int page, offset, elt;
> +     char out[6];
> +     int page;
> +
> +     err_puts(m, ":"); /* indicate compressed data */
> +     for (page = 0; page < obj->page_count; page++) {
> +             int i, len;
> +
> +             len = PAGE_SIZE;
> +             if (page == obj->page_count - 1)
> +                     len -= obj->unused;
> +             len = (len + 3) / 4;
>  
> -     for (page = offset = 0; page < obj->page_count; page++) {
> -             for (elt = 0; elt < PAGE_SIZE/4; elt++) {
> -                     err_printf(m, "%08x :  %08x\n", offset,
> -                                obj->pages[page][elt]);
> -                     offset += 4;
> +             for (i = 0; i < len; i++) {
> +                     if (ascii85_encode(obj->pages[page][i], out))
> +                             err_puts(m, out);
> +                     else
> +                             err_puts(m, "z");
>               }
>       }
> +     err_puts(m, "\n");
>  }
>  
>  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> @@ -328,8 +359,8 @@ int i915_error_state_to_str(struct 
> drm_i915_error_state_buf *m,
>       struct drm_i915_private *dev_priv = to_i915(dev);
>       struct drm_i915_error_state *error = error_priv->error;
>       struct drm_i915_error_object *obj;
> -     int i, j, offset, elt;
>       int max_hangcheck_score;
> +     int i, j;
>  
>       if (!error) {
>               err_printf(m, "no error state collected\n");
> @@ -481,75 +512,33 @@ int i915_error_state_to_str(struct 
> drm_i915_error_state_buf *m,
>               }
>  
>               if ((obj = ee->ringbuffer)) {
> -                     err_printf(m, "%s --- ringbuffer = 0x%08x\n",
> -                                dev_priv->engine[i].name,
> -                                lower_32_bits(obj->gtt_offset));
> +                     err_printf(m, "%s --- ringbuffer = 0x%08llx\n",
> +                                dev_priv->engine[i].name, obj->gtt_offset);
>                       print_error_obj(m, obj);
>               }
>  
> -             if ((obj = ee->hws_page)) {
> -                     u64 hws_offset = obj->gtt_offset;
> -                     u32 *hws_page = &obj->pages[0][0];
> -
> -                     if (i915.enable_execlists) {
> -                             hws_offset += LRC_PPHWSP_PN * PAGE_SIZE;
> -                             hws_page = &obj->pages[LRC_PPHWSP_PN][0];
> -                     }
> -                     err_printf(m, "%s --- HW Status = 0x%08llx\n",
> -                                dev_priv->engine[i].name, hws_offset);
> -                     offset = 0;
> -                     for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
> -                             err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> -                                        offset,
> -                                        hws_page[elt],
> -                                        hws_page[elt+1],
> -                                        hws_page[elt+2],
> -                                        hws_page[elt+3]);
> -                             offset += 16;
> -                     }
> +             if ((obj = ee->wa_ctx)) {
> +                     err_printf(m, "%s --- WA Context = 0x%08llx\n",
> +                                dev_priv->engine[i].name, obj->gtt_offset);
> +                     print_error_obj(m, obj);
>               }
>  
> -             obj = ee->wa_ctx;
> -             if (obj) {
> -                     u64 wa_ctx_offset = obj->gtt_offset;
> -                     u32 *wa_ctx_page = &obj->pages[0][0];
> -                     struct intel_engine_cs *engine = &dev_priv->engine[RCS];
> -                     u32 wa_ctx_size = (engine->wa_ctx.indirect_ctx.size +
> -                                        engine->wa_ctx.per_ctx.size);
> -
> -                     err_printf(m, "%s --- WA ctx batch buffer = 0x%08llx\n",
> -                                dev_priv->engine[i].name, wa_ctx_offset);
> -                     offset = 0;
> -                     for (elt = 0; elt < wa_ctx_size; elt += 4) {
> -                             err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> -                                        offset,
> -                                        wa_ctx_page[elt + 0],
> -                                        wa_ctx_page[elt + 1],
> -                                        wa_ctx_page[elt + 2],
> -                                        wa_ctx_page[elt + 3]);
> -                             offset += 16;
> -                     }
> +             if ((obj = ee->hws_page)) {
> +                     err_printf(m, "%s --- HW Status = 0x%08llx\n",
> +                                dev_priv->engine[i].name, obj->gtt_offset);
> +                     print_error_obj(m, obj);
>               }
>  
>               if ((obj = ee->ctx)) {
> -                     err_printf(m, "%s --- HW Context = 0x%08x\n",
> -                                dev_priv->engine[i].name,
> -                                lower_32_bits(obj->gtt_offset));
> +                     err_printf(m, "%s --- HW Context = 0x%08llx\n",
> +                                dev_priv->engine[i].name, obj->gtt_offset);
>                       print_error_obj(m, obj);
>               }
>       }
>  
>       if ((obj = error->semaphore_obj)) {
> -             err_printf(m, "Semaphore page = 0x%08x\n",
> -                        lower_32_bits(obj->gtt_offset));
> -             for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
> -                     err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> -                                elt * 4,
> -                                obj->pages[0][elt],
> -                                obj->pages[0][elt+1],
> -                                obj->pages[0][elt+2],
> -                                obj->pages[0][elt+3]);
> -             }
> +             err_printf(m, "Semaphore page = 0x%08llx\n", obj->gtt_offset);
> +             print_error_obj(m, obj);
>       }
>  
>       if (error->overlay)
> @@ -605,7 +594,7 @@ static void i915_error_object_free(struct 
> drm_i915_error_object *obj)
>               return;
>  
>       for (page = 0; page < obj->page_count; page++)
> -             kfree(obj->pages[page]);
> +             free_page((unsigned long)obj->pages[page]);
>  
>       kfree(obj);
>  }
> @@ -641,98 +630,107 @@ static void i915_error_state_free(struct kref 
> *error_ref)
>       kfree(error);
>  }
>  
> +static int compress_page(struct z_stream_s *zstream,
> +                      void *src,
> +                      struct drm_i915_error_object *dst)
> +{
> +     zstream->next_in = src;
> +     zstream->avail_in = PAGE_SIZE;
> +
> +     do {
> +             if (zstream->avail_out == 0) {
> +                     unsigned long page;
> +
> +                     page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
> +                     if (!page)
> +                             return -ENOMEM;
> +
> +                     dst->pages[dst->page_count++] = (void *)page;
> +
> +                     zstream->next_out = (void *)page;
> +                     zstream->avail_out = PAGE_SIZE;
> +             }
> +
> +             if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
> +                     return -EIO;
> +
> +#if 0
> +             if (zstream->total_out > zstream->total_in)
> +                     return -E2BIG;
> +#endif
> +     } while (zstream->avail_in);
> +
> +     return 0;
> +}
> +
>  static struct drm_i915_error_object *
> -i915_error_object_create(struct drm_i915_private *dev_priv,
> +i915_error_object_create(struct drm_i915_private *i915,
>                        struct i915_vma *vma)
>  {
> -     struct i915_ggtt *ggtt = &dev_priv->ggtt;
> -     struct drm_i915_gem_object *src;
> +     struct i915_ggtt *ggtt = &i915->ggtt;
> +     const u64 slot = ggtt->gpu_error.start;
>       struct drm_i915_error_object *dst;
> -     int num_pages;
> -     bool use_ggtt;
> -     int i = 0;
> -     u64 reloc_offset;
> +     struct z_stream_s zstream;
> +     unsigned long num_pages;
> +     struct sgt_iter iter;
> +     dma_addr_t dma;
>  
>       if (!vma)
>               return NULL;
>  
> -     src = vma->obj;
> -     if (!src->pages)
> -             return NULL;
> -
> -     num_pages = src->base.size >> PAGE_SHIFT;
> -
> -     dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
> +     num_pages = vma->size >> PAGE_SHIFT;
> +     num_pages = 10 * num_pages * sizeof(u32 *) >> 3;
> +     dst = kmalloc(sizeof(*dst) + num_pages, GFP_ATOMIC | __GFP_NOWARN);
>       if (!dst)
>               return NULL;
>  
>       dst->gtt_offset = vma->node.start;
> -     dst->gtt_size = vma->node.size;
> +     dst->page_count = 0;
> +     dst->unused = 0;
> +
> +     memset(&zstream, 0, sizeof(zstream));
> +     zstream.workspace = kmalloc(zlib_deflate_workspacesize(MAX_WBITS,
> +                                                            MAX_MEM_LEVEL),
> +                                 GFP_ATOMIC | __GFP_NOWARN);
> +     if (!zstream.workspace ||
> +         zlib_deflateInit(&zstream, Z_DEFAULT_COMPRESSION) != Z_OK) {
> +             kfree(zstream.workspace);
> +             kfree(dst);
> +             return NULL;
> +     }
>  
> -     reloc_offset = dst->gtt_offset;
> -     use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> -                (vma->flags & I915_VMA_GLOBAL_BIND) &&
> -                reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
> +     for_each_sgt_dma(dma, iter,
> +                      vma->ggtt_view.pages ?: vma->obj->pages) {
> +             int ret;
> +             void *s;
>  
> -     /* Cannot access stolen address directly, try to use the aperture */
> -     if (src->stolen) {
> -             use_ggtt = true;
> +             ggtt->base.insert_page(&ggtt->base, dma, slot,
> +                                    I915_CACHE_NONE, 0);
>  
> -             if (!(vma->flags & I915_VMA_GLOBAL_BIND))
> -                     goto unwind;
> +             s = (void *__force)
> +                     io_mapping_map_atomic_wc(ggtt->mappable, slot);
> +             ret = compress_page(&zstream, s, dst);
> +             io_mapping_unmap_atomic(s);
>  
> -             reloc_offset = vma->node.start;
> -             if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
> +             if (ret)
>                       goto unwind;
>       }
> +     zlib_deflate(&zstream, Z_FINISH);
> +     dst->unused = zstream.avail_out;
> +out:
> +     zlib_deflateEnd(&zstream);
> +     kfree(zstream.workspace);
>  
> -     /* Cannot access snooped pages through the aperture */
> -     if (use_ggtt && src->cache_level != I915_CACHE_NONE &&
> -         !HAS_LLC(dev_priv))
> -             goto unwind;
> -
> -     dst->page_count = num_pages;
> -     while (num_pages--) {
> -             void *d;
> -
> -             d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
> -             if (d == NULL)
> -                     goto unwind;
> -
> -             if (use_ggtt) {
> -                     void __iomem *s;
> -
> -                     /* Simply ignore tiling or any overlapping fence.
> -                      * It's part of the error state, and this hopefully
> -                      * captures what the GPU read.
> -                      */
> -
> -                     s = io_mapping_map_atomic_wc(ggtt->mappable,
> -                                                  reloc_offset);
> -                     memcpy_fromio(d, s, PAGE_SIZE);
> -                     io_mapping_unmap_atomic(s);
> -             } else {
> -                     struct page *page;
> -                     void *s;
> -
> -                     page = i915_gem_object_get_page(src, i);
> -
> -                     s = kmap_atomic(page);
> -                     memcpy(d, s, PAGE_SIZE);
> -                     kunmap_atomic(s);
> -             }
> -
> -             dst->pages[i++] = d;
> -             reloc_offset += PAGE_SIZE;
> -     }
> +     ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true);
>  
>       return dst;
>  
>  unwind:
> -     while (i--)
> -             kfree(dst->pages[i]);
> +     while (dst->page_count--)
> +             free_page((unsigned long)dst->pages[dst->page_count]);
>       kfree(dst);
> -     return NULL;
> +     dst = NULL;
> +     goto out;
>  }
>  
>  /* The error capture is special as tries to run underneath the normal
> @@ -979,6 +977,8 @@ static void error_record_engine_registers(struct 
> drm_i915_error_state *error,
>       ee->head = I915_READ_HEAD(engine);
>       ee->tail = I915_READ_TAIL(engine);
>       ee->ctl = I915_READ_CTL(engine);
> +     if (INTEL_GEN(dev_priv) > 2)
> +             ee->mode = I915_READ_MODE(engine);
>  
>       if (I915_NEED_GFX_HWS(dev_priv)) {
>               i915_reg_t mmio;
> @@ -1367,9 +1367,6 @@ static int capture(void *data)
>  {
>       struct drm_i915_error_state *error = data;
>  
> -     /* Ensure that what we readback from memory matches what the GPU sees */
> -     wbinvd();
> -
>       i915_capture_gen_state(error->i915, error);
>       i915_capture_reg_state(error->i915, error);
>       i915_gem_record_fences(error->i915, error);
> @@ -1383,9 +1380,6 @@ static int capture(void *data)
>       error->overlay = intel_overlay_capture_error_state(error->i915);
>       error->display = intel_display_capture_error_state(error->i915);
>  
> -     /* And make sure we don't leave trash in the CPU cache */
> -     wbinvd();
> -
>       return 0;
>  }
>  
> @@ -1459,7 +1453,6 @@ void i915_error_state_get(struct drm_device *dev,
>       if (error_priv->error)
>               kref_get(&error_priv->error->ref);
>       spin_unlock_irq(&dev_priv->gpu_error.lock);
> -
>  }
>  
>  void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to