If one thread takes read lock, one thread to acquire write lock, then
other thread can not acquire read lock while the writer is stalled. This
causes below deadlock case:

thread 1: prefetch range migrate to VRAM, take mmap read lock
thread 2: svm_range_evict_svm_bo_worker, migrate to RAM, take mmap read
lock
thread 3: svm_range_restore_work, for xnack off case only, take mmap
write lock to flush deferred list

To avoid deadlock, use write_trylock, go to sleep if lock contention,
then trylock again.

Signed-off-by: Philip Yang <[email protected]>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index b71d47afd243..1983849c4070 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1608,22 +1608,25 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
  * @svms: the svm range list
  * @mm: the mm structure
  *
- * Context: Returns with mmap write lock held, pending deferred work flushed
+ * Context: Non-atomic context, may sleep
  *
+ * Returns with mmap write lock held, pending deferred work flushed
  */
 void
 svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
                                   struct mm_struct *mm)
 {
-retry_flush_work:
-       flush_work(&svms->deferred_list_work);
-       mmap_write_lock(mm);
-
-       if (list_empty(&svms->deferred_range_list))
-               return;
-       mmap_write_unlock(mm);
-       pr_debug("retry flush\n");
-       goto retry_flush_work;
+       while (true) {
+               flush_work(&svms->deferred_list_work);
+               if (!mmap_write_trylock(mm)) {
+                       usleep_range(1000, 2000);
+                       continue;
+               }
+               if (list_empty(&svms->deferred_range_list))
+                       return;
+               mmap_write_unlock(mm);
+               pr_debug("retry flush\n");
+       }
 }
 
 static void svm_range_restore_work(struct work_struct *work)
-- 
2.17.1

Reply via email to