We have seen a few cases where the whole memory subsystem is blocked
and flush operations never complete. When that happens, we want to:

- schedule a reset, so we can recover from this situation
- in the reset path, we need to reset the pending_reqs so we can send
  new commands after the reset
- if more panthor_gpu_flush_caches() operations are queued after
  the timeout, we skip them and return -EIO directly to avoid needless
  waits (the memory block won't miraculously work again)

v2:
- New patch

Fixes: 5cd894e258c4 ("drm/panthor: Add the GPU logical block")
Signed-off-by: Boris Brezillon <[email protected]>
---
 drivers/gpu/drm/panthor/panthor_gpu.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c 
b/drivers/gpu/drm/panthor/panthor_gpu.c
index eda670229184..abd2fde04da9 100644
--- a/drivers/gpu/drm/panthor/panthor_gpu.c
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -283,38 +283,42 @@ int panthor_gpu_l2_power_on(struct panthor_device *ptdev)
 int panthor_gpu_flush_caches(struct panthor_device *ptdev,
                             u32 l2, u32 lsc, u32 other)
 {
-       bool timedout = false;
        unsigned long flags;
+       int ret = 0;
 
        /* Serialize cache flush operations. */
        guard(mutex)(&ptdev->gpu->cache_flush_lock);
 
        spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
-       if (!drm_WARN_ON(&ptdev->base,
-                        ptdev->gpu->pending_reqs & 
GPU_IRQ_CLEAN_CACHES_COMPLETED)) {
+       if (!(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) {
                ptdev->gpu->pending_reqs |= GPU_IRQ_CLEAN_CACHES_COMPLETED;
                gpu_write(ptdev, GPU_CMD, GPU_FLUSH_CACHES(l2, lsc, other));
+       } else {
+               ret = -EIO;
        }
        spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
 
+       if (ret)
+               return ret;
+
        if (!wait_event_timeout(ptdev->gpu->reqs_acked,
                                !(ptdev->gpu->pending_reqs & 
GPU_IRQ_CLEAN_CACHES_COMPLETED),
                                msecs_to_jiffies(100))) {
                spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
                if ((ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED) 
!= 0 &&
                    !(gpu_read(ptdev, GPU_INT_RAWSTAT) & 
GPU_IRQ_CLEAN_CACHES_COMPLETED))
-                       timedout = true;
+                       ret = -ETIMEDOUT;
                else
                        ptdev->gpu->pending_reqs &= 
~GPU_IRQ_CLEAN_CACHES_COMPLETED;
                spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
        }
 
-       if (timedout) {
+       if (ret) {
+               panthor_device_schedule_reset(ptdev);
                drm_err(&ptdev->base, "Flush caches timeout");
-               return -ETIMEDOUT;
        }
 
-       return 0;
+       return ret;
 }
 
 /**
@@ -354,6 +358,7 @@ int panthor_gpu_soft_reset(struct panthor_device *ptdev)
                return -ETIMEDOUT;
        }
 
+       ptdev->gpu->pending_reqs = 0;
        return 0;
 }
 
-- 
2.51.1

Reply via email to