Also i would recomment to split the change into two
a. Revert the status locks in one
b. Rest of the changes in another one.

Agree to what Alex has proposed as its not more the same revert anymore.

Regards
Sunil Khatri

On 9/5/2025 10:19 PM, Alex Deucher wrote:
+ Philip

On Fri, Sep 5, 2025 at 7:27 AM Christian König
<ckoenig.leichtzumer...@gmail.com> wrote:
This reverts commit 0479956c94b1cfa6a1ab9206eff76072944ece8b.
I would come up with a new title.  At this point this is more than a
revert and I think calling it out like that could lead to problems in
stable kernels if someone just tries to revert the original commit in
an older kernel.  Moreover, 0479956c94b1 really isn't what is actually
being reverted.  That was just a renaming patch, so no functional
change.  The patch this is actually reverting is whatever patch added
the invalidated_lock in the first place.

Maybe something like:

drm/amdgpu: rework VM invalidation locking

It turned out that protecting the status of each bo_va only with a
spinlock was just hiding problems instead of solving them.
Add a separate stats_lock and lockdep assertions that the correct
reservation lock is held all over the place. While at it also re-order
fields in the VM structure and try to improve the documentation a bit.


Additionally is there a reason why the programming pattern changed in
the list handling?  Most of the instances of:

while (!list_empty()) {
     entry = list_first_entry();

got converted to:

list_for_each_entry_safe();

but not all.  Would be good to explain why if there is a reason other
than just coding style.

It turned out that protecting the status of each bo_va only with a
spinlock was just hiding problems instead of solving them.

Revert the whole approach, add a separate stats_lock and lockdep
assertions that the correct reservation lock is held all over the place.

While at it also re-order fields in the VM structure and try to improve
the documentation a bit.

v2: re-add missing check

Signed-off-by: Christian König <christian.koe...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c |   8 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 168 +++++++++++-----------
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  25 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |   4 -
  4 files changed, 96 insertions(+), 109 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index cb3432b12221..dd57e034ae1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -619,12 +619,12 @@ amdgpu_userq_bo_validate(struct amdgpu_device *adev, 
struct drm_exec *exec,
         struct amdgpu_bo *bo;
         int ret;

-       spin_lock(&vm->status_lock);
+       spin_lock(&vm->invalidated_lock);
         while (!list_empty(&vm->invalidated)) {
                 bo_va = list_first_entry(&vm->invalidated,
                                          struct amdgpu_bo_va,
                                          base.vm_status);
-               spin_unlock(&vm->status_lock);
+               spin_unlock(&vm->invalidated_lock);

                 bo = bo_va->base.bo;
                 ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2);
@@ -641,9 +641,9 @@ amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct 
drm_exec *exec,
                 if (ret)
                         return ret;

-               spin_lock(&vm->status_lock);
+               spin_lock(&vm->invalidated_lock);
         }
-       spin_unlock(&vm->status_lock);
+       spin_unlock(&vm->invalidated_lock);

         return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 31976ef75014..2d5e78a14124 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -127,6 +127,17 @@ struct amdgpu_vm_tlb_seq_struct {
         struct dma_fence_cb cb;
  };

+/**
+ * amdgpu_vm_assert_locked - check if VM is correctly locked
+ * @vm: the VM which schould be tested
+ *
+ * Asserts that the VM root PD is locked.
+ */
+static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm)
+{
+       dma_resv_assert_held(vm->root.bo->tbo.base.resv);
+}
+
  /**
   * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
   *
@@ -143,6 +154,8 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
  {
         int r;

+       amdgpu_vm_assert_locked(vm);
+
         if (vm->pasid == pasid)
                 return 0;

@@ -181,12 +194,11 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base 
*vm_bo)
         struct amdgpu_bo *bo = vm_bo->bo;

         vm_bo->moved = true;
-       spin_lock(&vm_bo->vm->status_lock);
+       amdgpu_vm_assert_locked(vm);
         if (bo->tbo.type == ttm_bo_type_kernel)
                 list_move(&vm_bo->vm_status, &vm->evicted);
         else
                 list_move_tail(&vm_bo->vm_status, &vm->evicted);
-       spin_unlock(&vm_bo->vm->status_lock);
  }
  /**
   * amdgpu_vm_bo_moved - vm_bo is moved
@@ -198,9 +210,8 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base 
*vm_bo)
   */
  static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
  {
-       spin_lock(&vm_bo->vm->status_lock);
+       amdgpu_vm_assert_locked(vm_bo->vm);
         list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
-       spin_unlock(&vm_bo->vm->status_lock);
  }

  /**
@@ -213,9 +224,8 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base 
*vm_bo)
   */
  static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
  {
-       spin_lock(&vm_bo->vm->status_lock);
+       amdgpu_vm_assert_locked(vm_bo->vm);
         list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
-       spin_unlock(&vm_bo->vm->status_lock);
         vm_bo->moved = false;
  }

@@ -229,9 +239,9 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base 
*vm_bo)
   */
  static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
  {
-       spin_lock(&vm_bo->vm->status_lock);
+       spin_lock(&vm_bo->vm->invalidated_lock);
         list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
-       spin_unlock(&vm_bo->vm->status_lock);
+       spin_unlock(&vm_bo->vm->invalidated_lock);
  }

  /**
@@ -244,10 +254,9 @@ static void amdgpu_vm_bo_invalidated(struct 
amdgpu_vm_bo_base *vm_bo)
   */
  static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
  {
+       amdgpu_vm_assert_locked(vm_bo->vm);
         vm_bo->moved = true;
-       spin_lock(&vm_bo->vm->status_lock);
         list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user);
-       spin_unlock(&vm_bo->vm->status_lock);
  }

  /**
@@ -260,13 +269,11 @@ static void amdgpu_vm_bo_evicted_user(struct 
amdgpu_vm_bo_base *vm_bo)
   */
  static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
  {
-       if (vm_bo->bo->parent) {
-               spin_lock(&vm_bo->vm->status_lock);
+       amdgpu_vm_assert_locked(vm_bo->vm);
+       if (vm_bo->bo->parent)
                 list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
-               spin_unlock(&vm_bo->vm->status_lock);
-       } else {
+       else
                 amdgpu_vm_bo_idle(vm_bo);
-       }
  }

  /**
@@ -279,9 +286,8 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base 
*vm_bo)
   */
  static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
  {
-       spin_lock(&vm_bo->vm->status_lock);
+       amdgpu_vm_assert_locked(vm_bo->vm);
         list_move(&vm_bo->vm_status, &vm_bo->vm->done);
-       spin_unlock(&vm_bo->vm->status_lock);
  }

  /**
@@ -295,10 +301,13 @@ static void amdgpu_vm_bo_reset_state_machine(struct 
amdgpu_vm *vm)
  {
         struct amdgpu_vm_bo_base *vm_bo, *tmp;

-       spin_lock(&vm->status_lock);
+       spin_lock(&vm->invalidated_lock);
         list_splice_init(&vm->done, &vm->invalidated);
         list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
                 vm_bo->moved = true;
+       spin_unlock(&vm->invalidated_lock);
+
+       amdgpu_vm_assert_locked(vm_bo->vm);
         list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
                 struct amdgpu_bo *bo = vm_bo->bo;

@@ -308,14 +317,13 @@ static void amdgpu_vm_bo_reset_state_machine(struct 
amdgpu_vm *vm)
                 else if (bo->parent)
                         list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
         }
-       spin_unlock(&vm->status_lock);
  }

  /**
   * amdgpu_vm_update_shared - helper to update shared memory stat
   * @base: base structure for tracking BO usage in a VM
   *
- * Takes the vm status_lock and updates the shared memory stat. If the basic
+ * Takes the vm stats_lock and updates the shared memory stat. If the basic
   * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be 
called
   * as well.
   */
@@ -327,7 +335,8 @@ static void amdgpu_vm_update_shared(struct 
amdgpu_vm_bo_base *base)
         uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
         bool shared;

-       spin_lock(&vm->status_lock);
+       dma_resv_assert_held(bo->tbo.base.resv);
+       spin_lock(&vm->stats_lock);
         shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
         if (base->shared != shared) {
                 base->shared = shared;
@@ -339,7 +348,7 @@ static void amdgpu_vm_update_shared(struct 
amdgpu_vm_bo_base *base)
                         vm->stats[bo_memtype].drm.private += size;
                 }
         }
-       spin_unlock(&vm->status_lock);
+       spin_unlock(&vm->stats_lock);
  }

  /**
@@ -364,11 +373,11 @@ void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
   *        be bo->tbo.resource
   * @sign: if we should add (+1) or subtract (-1) from the stat
   *
- * Caller need to have the vm status_lock held. Useful for when multiple update
+ * Caller need to have the vm stats_lock held. Useful for when multiple update
   * need to happen at the same time.
   */
  static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
-                           struct ttm_resource *res, int sign)
+                                         struct ttm_resource *res, int sign)
  {
         struct amdgpu_vm *vm = base->vm;
         struct amdgpu_bo *bo = base->bo;
@@ -392,7 +401,8 @@ static void amdgpu_vm_update_stats_locked(struct 
amdgpu_vm_bo_base *base,
                  */
                 if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
                         vm->stats[res_memtype].drm.purgeable += size;
-               if (!(bo->preferred_domains & 
amdgpu_mem_type_to_domain(res_memtype)))
+               if (!(bo->preferred_domains &
+                     amdgpu_mem_type_to_domain(res_memtype)))
                         vm->stats[bo_memtype].evicted += size;
         }
  }
@@ -411,9 +421,9 @@ void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
  {
         struct amdgpu_vm *vm = base->vm;

-       spin_lock(&vm->status_lock);
+       spin_lock(&vm->stats_lock);
         amdgpu_vm_update_stats_locked(base, res, sign);
-       spin_unlock(&vm->status_lock);
+       spin_unlock(&vm->stats_lock);
  }

  /**
@@ -439,10 +449,10 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base 
*base,
         base->next = bo->vm_bo;
         bo->vm_bo = base;

-       spin_lock(&vm->status_lock);
+       spin_lock(&vm->stats_lock);
         base->shared = 
drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
         amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
-       spin_unlock(&vm->status_lock);
+       spin_unlock(&vm->stats_lock);

         if (!amdgpu_vm_is_bo_always_valid(vm, bo))
                 return;
@@ -500,10 +510,10 @@ int amdgpu_vm_lock_done(struct amdgpu_vm *vm, struct 
drm_exec *exec,
         int ret;

         /* We can only trust prev->next while holding the lock */
-       spin_lock(&vm->status_lock);
+       spin_lock(&vm->invalidated_lock);
         while (!list_is_head(prev->next, &vm->done)) {
                 bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status);
-               spin_unlock(&vm->status_lock);
+               spin_unlock(&vm->invalidated_lock);

                 bo = bo_va->base.bo;
                 if (bo) {
@@ -511,10 +521,10 @@ int amdgpu_vm_lock_done(struct amdgpu_vm *vm, struct 
drm_exec *exec,
                         if (unlikely(ret))
                                 return ret;
                 }
-               spin_lock(&vm->status_lock);
+               spin_lock(&vm->invalidated_lock);
                 prev = prev->next;
         }
-       spin_unlock(&vm->status_lock);
+       spin_unlock(&vm->invalidated_lock);

         return 0;
  }
@@ -610,7 +620,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
                        void *param)
  {
         uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
-       struct amdgpu_vm_bo_base *bo_base;
+       struct amdgpu_vm_bo_base *bo_base, *tmp;
         struct amdgpu_bo *bo;
         int r;

@@ -623,13 +633,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
                         return r;
         }

-       spin_lock(&vm->status_lock);
-       while (!list_empty(&vm->evicted)) {
-               bo_base = list_first_entry(&vm->evicted,
-                                          struct amdgpu_vm_bo_base,
-                                          vm_status);
-               spin_unlock(&vm->status_lock);
-
+       list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
                 bo = bo_base->bo;

                 r = validate(param, bo);
@@ -642,26 +646,21 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
                         vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
                         amdgpu_vm_bo_relocated(bo_base);
                 }
-               spin_lock(&vm->status_lock);
         }
-       while (ticket && !list_empty(&vm->evicted_user)) {
-               bo_base = list_first_entry(&vm->evicted_user,
-                                          struct amdgpu_vm_bo_base,
-                                          vm_status);
-               spin_unlock(&vm->status_lock);

-               bo = bo_base->bo;
-               dma_resv_assert_held(bo->tbo.base.resv);
-
-               r = validate(param, bo);
-               if (r)
-                       return r;
+       if (ticket) {
+               list_for_each_entry_safe(bo_base, tmp, &vm->evicted_user,
+                                        vm_status) {
+                       bo = bo_base->bo;
+                       dma_resv_assert_held(bo->tbo.base.resv);

-               amdgpu_vm_bo_invalidated(bo_base);
+                       r = validate(param, bo);
+                       if (r)
+                               return r;

-               spin_lock(&vm->status_lock);
+                       amdgpu_vm_bo_invalidated(bo_base);
+               }
         }
-       spin_unlock(&vm->status_lock);

         amdgpu_vm_eviction_lock(vm);
         vm->evicting = false;
@@ -684,13 +683,13 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
  {
         bool ret;

+       amdgpu_vm_assert_locked(vm);
+
         amdgpu_vm_eviction_lock(vm);
         ret = !vm->evicting;
         amdgpu_vm_eviction_unlock(vm);

-       spin_lock(&vm->status_lock);
         ret &= list_empty(&vm->evicted);
-       spin_unlock(&vm->status_lock);

         spin_lock(&vm->immediate.lock);
         ret &= !vm->immediate.stopped;
@@ -981,16 +980,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
                           struct amdgpu_vm *vm, bool immediate)
  {
         struct amdgpu_vm_update_params params;
-       struct amdgpu_vm_bo_base *entry;
+       struct amdgpu_vm_bo_base *entry, *tmp;
         bool flush_tlb_needed = false;
-       LIST_HEAD(relocated);
         int r, idx;

-       spin_lock(&vm->status_lock);
-       list_splice_init(&vm->relocated, &relocated);
-       spin_unlock(&vm->status_lock);
+       amdgpu_vm_assert_locked(vm);

-       if (list_empty(&relocated))
+       if (list_empty(&vm->relocated))
                 return 0;

         if (!drm_dev_enter(adev_to_drm(adev), &idx))
@@ -1005,7 +1001,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
         if (r)
                 goto error;

-       list_for_each_entry(entry, &relocated, vm_status) {
+       list_for_each_entry(entry, &vm->relocated, vm_status) {
                 /* vm_flush_needed after updating moved PDEs */
                 flush_tlb_needed |= entry->moved;

@@ -1021,9 +1017,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
         if (flush_tlb_needed)
                 atomic64_inc(&vm->tlb_seq);

-       while (!list_empty(&relocated)) {
-               entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base,
-                                        vm_status);
+       list_for_each_entry_safe(entry, tmp, &vm->relocated, vm_status) {
                 amdgpu_vm_bo_idle(entry);
         }

@@ -1249,9 +1243,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
                           struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
  {
-       spin_lock(&vm->status_lock);
+       spin_lock(&vm->stats_lock);
         memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
-       spin_unlock(&vm->status_lock);
+       spin_unlock(&vm->stats_lock);
  }

  /**
@@ -1618,29 +1612,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
                            struct amdgpu_vm *vm,
                            struct ww_acquire_ctx *ticket)
  {
-       struct amdgpu_bo_va *bo_va;
+       struct amdgpu_bo_va *bo_va, *tmp;
         struct dma_resv *resv;
         bool clear, unlock;
         int r;

-       spin_lock(&vm->status_lock);
-       while (!list_empty(&vm->moved)) {
-               bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
-                                        base.vm_status);
-               spin_unlock(&vm->status_lock);
-
+       list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
                 /* Per VM BOs never need to bo cleared in the page tables */
                 r = amdgpu_vm_bo_update(adev, bo_va, false);
                 if (r)
                         return r;
-               spin_lock(&vm->status_lock);
         }

+       spin_lock(&vm->invalidated_lock);
         while (!list_empty(&vm->invalidated)) {
                 bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
                                          base.vm_status);
Why is this not converted to  list_for_each_entry_safe() like all of
the others above?

Alex


                 resv = bo_va->base.bo->tbo.base.resv;
-               spin_unlock(&vm->status_lock);
+               spin_unlock(&vm->invalidated_lock);

                 /* Try to reserve the BO to avoid clearing its ptes */
                 if (!adev->debug_vm && dma_resv_trylock(resv)) {
@@ -1672,9 +1661,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
                      bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM))
                         amdgpu_vm_bo_evicted_user(&bo_va->base);

-               spin_lock(&vm->status_lock);
+               spin_lock(&vm->invalidated_lock);
         }
-       spin_unlock(&vm->status_lock);
+       spin_unlock(&vm->invalidated_lock);

         return 0;
  }
@@ -2203,9 +2192,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
                 }
         }

-       spin_lock(&vm->status_lock);
+       spin_lock(&vm->invalidated_lock);
         list_del(&bo_va->base.vm_status);
-       spin_unlock(&vm->status_lock);
+       spin_unlock(&vm->invalidated_lock);

         list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
                 list_del(&mapping->list);
@@ -2313,10 +2302,10 @@ void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct 
ttm_resource *new_mem,
         for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
                 struct amdgpu_vm *vm = bo_base->vm;

-               spin_lock(&vm->status_lock);
+               spin_lock(&vm->stats_lock);
                 amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
                 amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
-               spin_unlock(&vm->status_lock);
+               spin_unlock(&vm->stats_lock);
         }

         amdgpu_vm_bo_invalidate(bo, evicted);
@@ -2583,11 +2572,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
         INIT_LIST_HEAD(&vm->relocated);
         INIT_LIST_HEAD(&vm->moved);
         INIT_LIST_HEAD(&vm->idle);
+       spin_lock_init(&vm->invalidated_lock);
         INIT_LIST_HEAD(&vm->invalidated);
-       spin_lock_init(&vm->status_lock);
         INIT_LIST_HEAD(&vm->freed);
         INIT_LIST_HEAD(&vm->done);
         INIT_KFIFO(vm->faults);
+       spin_lock_init(&vm->stats_lock);

         r = amdgpu_vm_init_entities(adev, vm);
         if (r)
@@ -3052,7 +3042,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, 
struct seq_file *m)
         unsigned int total_done_objs = 0;
         unsigned int id = 0;

-       spin_lock(&vm->status_lock);
+       amdgpu_vm_assert_locked(vm);
+
         seq_puts(m, "\tIdle BOs:\n");
         list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
                 if (!bo_va->base.bo)
@@ -3090,11 +3081,13 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, 
struct seq_file *m)
         id = 0;

         seq_puts(m, "\tInvalidated BOs:\n");
+       spin_lock(&vm->invalidated_lock);
         list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) 
{
                 if (!bo_va->base.bo)
                         continue;
                 total_invalidated += amdgpu_bo_print_info(id++, 
bo_va->base.bo, m);
         }
+       spin_unlock(&vm->invalidated_lock);
         total_invalidated_objs = id;
         id = 0;

@@ -3104,7 +3097,6 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, 
struct seq_file *m)
                         continue;
                 total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
         }
-       spin_unlock(&vm->status_lock);
         total_done_objs = id;

         seq_printf(m, "\tTotal idle size:        %12lld\tobjs:\t%d\n", 
total_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index f86b1a6afb0c..03219ba110b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -203,11 +203,11 @@ struct amdgpu_vm_bo_base {
         /* protected by bo being reserved */
         struct amdgpu_vm_bo_base        *next;

-       /* protected by vm status_lock */
+       /* protected by vm reservation and status_lock */
         struct list_head                vm_status;

         /* if the bo is counted as shared in mem stats
-        * protected by vm status_lock */
+        * protected by vm BO being reserved */
         bool                            shared;

         /* protected by the BO being reserved */
@@ -343,18 +343,13 @@ struct amdgpu_vm {
         bool                    evicting;
         unsigned int            saved_flags;

-       /* Lock to protect vm_bo add/del/move on all lists of vm */
-       spinlock_t              status_lock;
-
-       /* Memory statistics for this vm, protected by status_lock */
+       /* Memory statistics for this vm, protected by stats_lock */
+       spinlock_t              stats_lock;
         struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];

         /* Per-VM and PT BOs who needs a validation */
         struct list_head        evicted;

-       /* BOs for user mode queues that need a validation */
-       struct list_head        evicted_user;
-
         /* PT BOs which relocated and their parent need an update */
         struct list_head        relocated;

@@ -364,15 +359,19 @@ struct amdgpu_vm {
         /* All BOs of this VM not currently in the state machine */
         struct list_head        idle;

-       /* regular invalidated BOs, but not yet updated in the PT */
+       /* Regular BOs for KFD queues that need a validation */
+       struct list_head        evicted_user;
+
+       /* Regular invalidated BOs, need to be validated and updated in the PT 
*/
+       spinlock_t              invalidated_lock;
         struct list_head        invalidated;

+       /* Regular BOs which are validated and location has been updated in the 
PTs */
+       struct list_head        done;
+
         /* BO mappings freed, but not yet updated in the PT */
         struct list_head        freed;

-       /* BOs which are invalidated, has been updated in the PTs */
-       struct list_head        done;
-
         /* contains the page directory */
         struct amdgpu_vm_bo_base     root;
         struct dma_fence        *last_update;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 30022123b0bf..f57c48b74274 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -541,9 +541,7 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base 
*entry)
         entry->bo->vm_bo = NULL;
         ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);

-       spin_lock(&entry->vm->status_lock);
         list_del(&entry->vm_status);
-       spin_unlock(&entry->vm->status_lock);
         amdgpu_bo_unref(&entry->bo);
  }

@@ -587,7 +585,6 @@ static void amdgpu_vm_pt_add_list(struct 
amdgpu_vm_update_params *params,
         struct amdgpu_vm_pt_cursor seek;
         struct amdgpu_vm_bo_base *entry;

-       spin_lock(&params->vm->status_lock);
         for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, 
entry) {
                 if (entry && entry->bo)
                         list_move(&entry->vm_status, 
&params->tlb_flush_waitlist);
@@ -595,7 +592,6 @@ static void amdgpu_vm_pt_add_list(struct 
amdgpu_vm_update_params *params,

         /* enter start node now */
         list_move(&cursor->entry->vm_status, &params->tlb_flush_waitlist);
-       spin_unlock(&params->vm->status_lock);
  }

  /**
--
2.43.0


Reply via email to