Am 18.08.2016 um 13:17 schrieb Chunming Zhou:
V2:
1. don't directly submit to many jobs at the same time.
2. delete unrelated printk.
Change-Id: I963598ba6eb44bc8620d70e026c0175d1a1de120
Signed-off-by: Chunming Zhou <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58 +++++++++++++++++++++++++++++-
1 file changed, 57 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c9b15c0..0bedb26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2120,6 +2120,35 @@ bool amdgpu_need_backup(struct amdgpu_device *adev)
return amdgpu_lockup_timeout > 0 ? true : false;
}
+static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_bo *bo,
+ struct fence **fence)
+{
+ uint32_t domain;
+ int r;
+
+ if (!bo->shadow)
+ return 0;
+
+ r = amdgpu_bo_reserve(bo, false);
+ if (r)
+ return r;
+ domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
+ /* if bo has been evicted, then no need to recover */
+ if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+ r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
+ NULL, fence, true);
+ if (r) {
+ DRM_ERROR("recover page table failed!\n");
+ goto err;
+ }
+ }
+err:
+ amdgpu_bo_unreserve(bo);
+ return r;
+}
+
/**
* amdgpu_gpu_reset - reset the asic
*
@@ -2202,13 +2231,40 @@ retry:
if (r) {
dev_err(adev->dev, "ib ring test failed (%d).\n", r);
r = amdgpu_suspend(adev);
+ need_full_reset = true;
goto retry;
}
-
+ /**
+ * recovery vm page tables, since we cannot depend on VRAM is
+ * consistent after gpu full reset.
+ */
+ if (need_full_reset && amdgpu_need_backup(adev)) {
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_bo *bo, *tmp;
+ struct fence *fence = NULL, *next = NULL;
+
+ DRM_INFO("recover vram bo from shadow\n");
+ mutex_lock(&adev->shadow_list_lock);
+ list_for_each_entry_safe(bo, tmp, &adev->shadow_list,
shadow_list) {
+ amdgpu_recover_vram_from_shadow(adev, ring, bo,
&next);
+ if (fence)
+ fence_wait(fence, false);
You should check the return code here as well and abort the loop if
anything fails.
With that fixed the patch is Reviewed-by: Christian König
<[email protected]> as well.
Finally done with that!
Cheers,
Christian.
+ fence_put(fence);
+ fence = next;
+ }
+ mutex_unlock(&adev->shadow_list_lock);
+ if (fence) {
+ r = fence_wait(fence, false);
+ if (r)
+ WARN(r, "recovery from shadow isn't
comleted\n");
+ }
+ fence_put(fence);
+ }
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring)
continue;
+
amd_sched_job_recovery(&ring->sched);
kthread_unpark(ring->sched.thread);
}
_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx