On Fri, 28 Nov 2025 14:14:15 +0000
Alice Ryhl <[email protected]> wrote:

> When calling drm_gpuvm_bo_obtain_prealloc() and using immediate mode,
> this may result in a call to ops->vm_bo_free(vm_bo) while holding the
> GEMs gpuva mutex. This is a problem if ops->vm_bo_free(vm_bo) performs
> any operations that are not safe in the fence signalling critical path,
> and it turns out that Panthor (the only current user of the method)
> calls drm_gem_shmem_unpin() which takes a resv lock internally.
> 
> This constitutes both a violation of signalling safety and lock
> inversion. To fix this, we modify the method to internally take the GEMs
> gpuva mutex so that the mutex can be unlocked before freeing the
> preallocated vm_bo.
> 
> Note that this modification introduces a requirement that the driver
> uses immediate mode to call drm_gpuvm_bo_obtain_prealloc() as it would
> otherwise take the wrong lock.
> 
> Signed-off-by: Alice Ryhl <[email protected]>

Reviewed-by: Boris Brezillon <[email protected]>

Should we add a Fixes tag?

> ---
>  drivers/gpu/drm/drm_gpuvm.c           | 58 
> ++++++++++++++++++++++-------------
>  drivers/gpu/drm/panthor/panthor_mmu.c | 10 ------
>  2 files changed, 37 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
> index 
> 936e6c1a60c16ed5a6898546bf99e23a74f6b58b..f08a5cc1d611f971862c1272987e5ecd6d97c163
>  100644
> --- a/drivers/gpu/drm/drm_gpuvm.c
> +++ b/drivers/gpu/drm/drm_gpuvm.c
> @@ -1601,14 +1601,37 @@ drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm,
>  }
>  EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create);
>  
> +static void
> +drm_gpuvm_bo_destroy_not_in_lists(struct drm_gpuvm_bo *vm_bo)
> +{
> +     struct drm_gpuvm *gpuvm = vm_bo->vm;
> +     const struct drm_gpuvm_ops *ops = gpuvm->ops;
> +     struct drm_gem_object *obj = vm_bo->obj;
> +
> +     if (ops && ops->vm_bo_free)
> +             ops->vm_bo_free(vm_bo);
> +     else
> +             kfree(vm_bo);
> +
> +     drm_gpuvm_put(gpuvm);
> +     drm_gem_object_put(obj);
> +}
> +
> +static void
> +drm_gpuvm_bo_destroy_not_in_lists_kref(struct kref *kref)
> +{
> +     struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo,
> +                                               kref);
> +
> +     drm_gpuvm_bo_destroy_not_in_lists(vm_bo);
> +}
> +
>  static void
>  drm_gpuvm_bo_destroy(struct kref *kref)
>  {
>       struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo,
>                                                 kref);
>       struct drm_gpuvm *gpuvm = vm_bo->vm;
> -     const struct drm_gpuvm_ops *ops = gpuvm->ops;
> -     struct drm_gem_object *obj = vm_bo->obj;
>       bool lock = !drm_gpuvm_resv_protected(gpuvm);
>  
>       if (!lock)
> @@ -1617,16 +1640,10 @@ drm_gpuvm_bo_destroy(struct kref *kref)
>       drm_gpuvm_bo_list_del(vm_bo, extobj, lock);
>       drm_gpuvm_bo_list_del(vm_bo, evict, lock);
>  
> -     drm_gem_gpuva_assert_lock_held(gpuvm, obj);
> +     drm_gem_gpuva_assert_lock_held(gpuvm, vm_bo->obj);
>       list_del(&vm_bo->list.entry.gem);
>  
> -     if (ops && ops->vm_bo_free)
> -             ops->vm_bo_free(vm_bo);
> -     else
> -             kfree(vm_bo);
> -
> -     drm_gpuvm_put(gpuvm);
> -     drm_gem_object_put(obj);
> +     drm_gpuvm_bo_destroy_not_in_lists(vm_bo);
>  }
>  
>  /**
> @@ -1744,9 +1761,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put_deferred);
>  void
>  drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm)
>  {
> -     const struct drm_gpuvm_ops *ops = gpuvm->ops;
>       struct drm_gpuvm_bo *vm_bo;
> -     struct drm_gem_object *obj;
>       struct llist_node *bo_defer;
>  
>       bo_defer = llist_del_all(&gpuvm->bo_defer);
> @@ -1765,14 +1780,7 @@ drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm)
>       while (bo_defer) {
>               vm_bo = llist_entry(bo_defer, struct drm_gpuvm_bo, 
> list.entry.bo_defer);
>               bo_defer = bo_defer->next;
> -             obj = vm_bo->obj;
> -             if (ops && ops->vm_bo_free)
> -                     ops->vm_bo_free(vm_bo);
> -             else
> -                     kfree(vm_bo);
> -
> -             drm_gpuvm_put(gpuvm);
> -             drm_gem_object_put(obj);
> +             drm_gpuvm_bo_destroy_not_in_lists(vm_bo);
>       }
>  }
>  EXPORT_SYMBOL_GPL(drm_gpuvm_bo_deferred_cleanup);
> @@ -1860,6 +1868,9 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain);
>   * count is decreased. If not found @__vm_bo is returned without further
>   * increase of the reference count.
>   *
> + * The provided @__vm_bo must not already be in the gpuva, evict, or extobj
> + * lists prior to calling this method.
> + *
>   * A new &drm_gpuvm_bo is added to the GEMs gpuva list.
>   *
>   * Returns: a pointer to the found &drm_gpuvm_bo or @__vm_bo if no existing
> @@ -1872,14 +1883,19 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo 
> *__vm_bo)
>       struct drm_gem_object *obj = __vm_bo->obj;
>       struct drm_gpuvm_bo *vm_bo;
>  
> +     drm_WARN_ON(gpuvm->drm, !drm_gpuvm_immediate_mode(gpuvm));
> +
> +     mutex_lock(&obj->gpuva.lock);
>       vm_bo = drm_gpuvm_bo_find(gpuvm, obj);
>       if (vm_bo) {
> -             drm_gpuvm_bo_put(__vm_bo);
> +             mutex_unlock(&obj->gpuva.lock);
> +             kref_put(&__vm_bo->kref, 
> drm_gpuvm_bo_destroy_not_in_lists_kref);
>               return vm_bo;
>       }
>  
>       drm_gem_gpuva_assert_lock_held(gpuvm, obj);
>       list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list);
> +     mutex_unlock(&obj->gpuva.lock);
>  
>       return __vm_bo;
>  }
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c 
> b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 
> 9f5f4ddf291024121f3fd5644f2fdeba354fa67c..be8811a70e1a3adec87ca4a85cad7c838f54bebf
>  100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -1224,17 +1224,7 @@ static int panthor_vm_prepare_map_op_ctx(struct 
> panthor_vm_op_ctx *op_ctx,
>               goto err_cleanup;
>       }
>  
> -     /* drm_gpuvm_bo_obtain_prealloc() will call drm_gpuvm_bo_put() on our
> -      * pre-allocated BO if the <BO,VM> association exists. Given we
> -      * only have one ref on preallocated_vm_bo, drm_gpuvm_bo_destroy() will
> -      * be called immediately, and we have to hold the VM resv lock when
> -      * calling this function.
> -      */
> -     dma_resv_lock(panthor_vm_resv(vm), NULL);
> -     mutex_lock(&bo->base.base.gpuva.lock);
>       op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo);
> -     mutex_unlock(&bo->base.base.gpuva.lock);
> -     dma_resv_unlock(panthor_vm_resv(vm));
>  
>       op_ctx->map.bo_offset = offset;
>  
> 

Reply via email to