Remove some redundant kernel messages as we deduce a hung GPU and
capture the error state.

v2: Fix "hang" vs "no progress" message whilst I was there

Signed-off-by: Chris Wilson <[email protected]>
---
 drivers/gpu/drm/i915/i915_irq.c | 41 ++++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 34e25fc2b90a..860235d1e0bf 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3083,9 +3083,8 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
                container_of(work, typeof(*dev_priv),
                             gpu_error.hangcheck_work.work);
        struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-       int busy_count = 0, rings_hung = 0;
-       bool stuck[I915_NUM_ENGINES] = { 0 };
+       unsigned hung = 0, stuck = 0;
+       int busy_count = 0;
 #define BUSY 1
 #define KICK 5
 #define HUNG 20
@@ -3103,7 +3102,7 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
         */
        intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
 
-       for_each_engine_id(engine, dev_priv, id) {
+       for_each_engine(engine, dev_priv) {
                bool busy = intel_engine_has_waiter(engine);
                u64 acthd;
                u32 seqno;
@@ -3166,10 +3165,15 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
                                        break;
                                case HANGCHECK_HUNG:
                                        engine->hangcheck.score += HUNG;
-                                       stuck[id] = true;
                                        break;
                                }
                        }
+
+                       if (engine->hangcheck.score >= 
HANGCHECK_SCORE_RING_HUNG) {
+                               hung |= intel_engine_flag(engine);
+                               if (engine->hangcheck.action != HANGCHECK_HUNG)
+                                       stuck |= intel_engine_flag(engine);
+                       }
                } else {
                        engine->hangcheck.action = HANGCHECK_ACTIVE;
 
@@ -3194,17 +3198,24 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
                busy_count += busy;
        }
 
-       for_each_engine_id(engine, dev_priv, id) {
-               if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) {
-                       DRM_INFO("%s on %s\n",
-                                stuck[id] ? "stuck" : "no progress",
-                                engine->name);
-                       rings_hung |= intel_engine_flag(engine);
-               }
-       }
+       if (hung) {
+               char msg[80];
+               int len;
 
-       if (rings_hung)
-               i915_handle_error(dev_priv, rings_hung, "Engine(s) hung");
+               /* If some rings hung but others were still busy, only
+                * blame the hanging rings in the synopsis.
+                */
+               if (stuck != hung)
+                       hung &= ~stuck;
+               len = snprintf(msg, sizeof(msg),
+                              "%s on ", stuck == hung ? "No progress" : 
"Hang");
+               for_each_engine_masked(engine, dev_priv, hung)
+                       len += snprintf(msg + len, sizeof(msg) - len,
+                                       "%s, ", engine->name);
+               msg[len-2] = '\0';
+
+               return i915_handle_error(dev_priv, hung, msg);
+       }
 
        /* Reset timer in case GPU hangs without another request being added */
        if (busy_count)
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to