Re: Any insight on drm resetting chip for stopped heartbeat error

Jonathan Gray Sun, 06 Dec 2020 17:44:57 -0800

On Sun, Dec 06, 2020 at 11:44:11AM -0800, Joseph Olatt wrote:
> Hi,
> 
> I've started seeing the following error on my laptop along with
> associated temporary freezing of the system:
> 
>   drm:pid90783:intel_gt_reset *NOTICE* Resetting chip for stopped
>   heartbeat on rcs0
>   drm:pid90783:mark_guilty *NOTICE* Xorg[83345] context reset due to GPU
>   hang
>   i915_vma_coredump_create: stub
>   i915_vma_coredump_create: stub
>   i915_vma_coredump_create: stub
>   i915_vma_coredump_create: stub
>   i915_vma_coredump_create: stub
>   i915_vma_coredump_create: stub
>   i915_vma_coredump_create: stub
>   i915_vma_coredump_create: stub
>   i915_vma_coredump_create: stub
>   pool_fini: stub
>   err_free_sgl: stub


This is inteldrm after it noticed a gpu hang.  Was there something in
particular that was running at that point?

This diff against -current should help for some haswell problems,
but perhaps not the one you are seeing.

https://patchwork.freedesktop.org/patch/395580/?series=82783&rev=1
https://gitlab.freedesktop.org/drm/intel/-/issues/2024

Index: sys/dev/pci/drm/i915/gt/gen7_renderclear.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/gen7_renderclear.c,v
retrieving revision 1.1
diff -u -p -r1.1 gen7_renderclear.c
--- sys/dev/pci/drm/i915/gt/gen7_renderclear.c  8 Jun 2020 04:48:13 -0000       
1.1
+++ sys/dev/pci/drm/i915/gt/gen7_renderclear.c  28 Nov 2020 02:50:26 -0000
@@ -7,8 +7,6 @@
 #include "i915_drv.h"
 #include "intel_gpu_commands.h"
 
-#define MAX_URB_ENTRIES 64
-#define STATE_SIZE (4 * 1024)
 #define GT3_INLINE_DATA_DELAYS 0x1E00
 #define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS))
 
@@ -34,8 +32,7 @@ struct batch_chunk {
 };
 
 struct batch_vals {
-       u32 max_primitives;
-       u32 max_urb_entries;
+       u32 max_primitives; /* == number of VFE threads */
        u32 cmd_size;
        u32 state_size;
        u32 state_start;
@@ -50,18 +47,35 @@ static void
 batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv)
 {
        if (IS_HASWELL(i915)) {
-               bv->max_primitives = 280;
-               bv->max_urb_entries = MAX_URB_ENTRIES;
+               switch (INTEL_INFO(i915)->gt) {
+               default:
+               case 1:
+                       bv->max_primitives = 70;
+                       break;
+               case 2:
+                       bv->max_primitives = 140;
+                       break;
+               case 3:
+                       bv->max_primitives = 280;
+                       break;
+               }
                bv->surface_height = 16 * 16;
                bv->surface_width = 32 * 2 * 16;
        } else {
-               bv->max_primitives = 128;
-               bv->max_urb_entries = MAX_URB_ENTRIES / 2;
+               switch (INTEL_INFO(i915)->gt) {
+               default:
+               case 1: /* including vlv */
+                       bv->max_primitives = 36;
+                       break;
+               case 2:
+                       bv->max_primitives = 128;
+                       break;
+               }
                bv->surface_height = 16 * 8;
                bv->surface_width = 32 * 16;
        }
        bv->cmd_size = bv->max_primitives * 4096;
-       bv->state_size = STATE_SIZE;
+       bv->state_size = SZ_4K;
        bv->state_start = bv->cmd_size;
        bv->batch_size = bv->cmd_size + bv->state_size;
        bv->scratch_size = bv->surface_height * bv->surface_width;
@@ -244,7 +258,6 @@ gen7_emit_vfe_state(struct batch_chunk *
                    u32 urb_size, u32 curbe_size,
                    u32 mode)
 {
-       u32 urb_entries = bv->max_urb_entries;
        u32 threads = bv->max_primitives - 1;
        u32 *cs = batch_alloc_items(batch, 32, 8);
 
@@ -254,7 +267,7 @@ gen7_emit_vfe_state(struct batch_chunk *
        *cs++ = 0;
 
        /* number of threads & urb entries for GPGPU vs Media Mode */
-       *cs++ = threads << 16 | urb_entries << 8 | mode << 2;
+       *cs++ = threads << 16 | 1 << 8 | mode << 2;
 
        *cs++ = 0;

Re: Any insight on drm resetting chip for stopped heartbeat error

Reply via email to