On Fri, 28 Nov 2025 14:14:15 +0000 Alice Ryhl <[email protected]> wrote:
> When calling drm_gpuvm_bo_obtain_prealloc() and using immediate mode, > this may result in a call to ops->vm_bo_free(vm_bo) while holding the > GEMs gpuva mutex. This is a problem if ops->vm_bo_free(vm_bo) performs > any operations that are not safe in the fence signalling critical path, > and it turns out that Panthor (the only current user of the method) > calls drm_gem_shmem_unpin() which takes a resv lock internally. > > This constitutes both a violation of signalling safety and lock > inversion. To fix this, we modify the method to internally take the GEMs > gpuva mutex so that the mutex can be unlocked before freeing the > preallocated vm_bo. > > Note that this modification introduces a requirement that the driver > uses immediate mode to call drm_gpuvm_bo_obtain_prealloc() as it would > otherwise take the wrong lock. > > Signed-off-by: Alice Ryhl <[email protected]> Reviewed-by: Boris Brezillon <[email protected]> Should we add a Fixes tag? > --- > drivers/gpu/drm/drm_gpuvm.c | 58 > ++++++++++++++++++++++------------- > drivers/gpu/drm/panthor/panthor_mmu.c | 10 ------ > 2 files changed, 37 insertions(+), 31 deletions(-) > > diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c > index > 936e6c1a60c16ed5a6898546bf99e23a74f6b58b..f08a5cc1d611f971862c1272987e5ecd6d97c163 > 100644 > --- a/drivers/gpu/drm/drm_gpuvm.c > +++ b/drivers/gpu/drm/drm_gpuvm.c > @@ -1601,14 +1601,37 @@ drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm, > } > EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create); > > +static void > +drm_gpuvm_bo_destroy_not_in_lists(struct drm_gpuvm_bo *vm_bo) > +{ > + struct drm_gpuvm *gpuvm = vm_bo->vm; > + const struct drm_gpuvm_ops *ops = gpuvm->ops; > + struct drm_gem_object *obj = vm_bo->obj; > + > + if (ops && ops->vm_bo_free) > + ops->vm_bo_free(vm_bo); > + else > + kfree(vm_bo); > + > + drm_gpuvm_put(gpuvm); > + drm_gem_object_put(obj); > +} > + > +static void > +drm_gpuvm_bo_destroy_not_in_lists_kref(struct kref *kref) > +{ > + struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo, > + kref); > + > + drm_gpuvm_bo_destroy_not_in_lists(vm_bo); > +} > + > static void > drm_gpuvm_bo_destroy(struct kref *kref) > { > struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo, > kref); > struct drm_gpuvm *gpuvm = vm_bo->vm; > - const struct drm_gpuvm_ops *ops = gpuvm->ops; > - struct drm_gem_object *obj = vm_bo->obj; > bool lock = !drm_gpuvm_resv_protected(gpuvm); > > if (!lock) > @@ -1617,16 +1640,10 @@ drm_gpuvm_bo_destroy(struct kref *kref) > drm_gpuvm_bo_list_del(vm_bo, extobj, lock); > drm_gpuvm_bo_list_del(vm_bo, evict, lock); > > - drm_gem_gpuva_assert_lock_held(gpuvm, obj); > + drm_gem_gpuva_assert_lock_held(gpuvm, vm_bo->obj); > list_del(&vm_bo->list.entry.gem); > > - if (ops && ops->vm_bo_free) > - ops->vm_bo_free(vm_bo); > - else > - kfree(vm_bo); > - > - drm_gpuvm_put(gpuvm); > - drm_gem_object_put(obj); > + drm_gpuvm_bo_destroy_not_in_lists(vm_bo); > } > > /** > @@ -1744,9 +1761,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put_deferred); > void > drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm) > { > - const struct drm_gpuvm_ops *ops = gpuvm->ops; > struct drm_gpuvm_bo *vm_bo; > - struct drm_gem_object *obj; > struct llist_node *bo_defer; > > bo_defer = llist_del_all(&gpuvm->bo_defer); > @@ -1765,14 +1780,7 @@ drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm) > while (bo_defer) { > vm_bo = llist_entry(bo_defer, struct drm_gpuvm_bo, > list.entry.bo_defer); > bo_defer = bo_defer->next; > - obj = vm_bo->obj; > - if (ops && ops->vm_bo_free) > - ops->vm_bo_free(vm_bo); > - else > - kfree(vm_bo); > - > - drm_gpuvm_put(gpuvm); > - drm_gem_object_put(obj); > + drm_gpuvm_bo_destroy_not_in_lists(vm_bo); > } > } > EXPORT_SYMBOL_GPL(drm_gpuvm_bo_deferred_cleanup); > @@ -1860,6 +1868,9 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain); > * count is decreased. If not found @__vm_bo is returned without further > * increase of the reference count. > * > + * The provided @__vm_bo must not already be in the gpuva, evict, or extobj > + * lists prior to calling this method. > + * > * A new &drm_gpuvm_bo is added to the GEMs gpuva list. > * > * Returns: a pointer to the found &drm_gpuvm_bo or @__vm_bo if no existing > @@ -1872,14 +1883,19 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo > *__vm_bo) > struct drm_gem_object *obj = __vm_bo->obj; > struct drm_gpuvm_bo *vm_bo; > > + drm_WARN_ON(gpuvm->drm, !drm_gpuvm_immediate_mode(gpuvm)); > + > + mutex_lock(&obj->gpuva.lock); > vm_bo = drm_gpuvm_bo_find(gpuvm, obj); > if (vm_bo) { > - drm_gpuvm_bo_put(__vm_bo); > + mutex_unlock(&obj->gpuva.lock); > + kref_put(&__vm_bo->kref, > drm_gpuvm_bo_destroy_not_in_lists_kref); > return vm_bo; > } > > drm_gem_gpuva_assert_lock_held(gpuvm, obj); > list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list); > + mutex_unlock(&obj->gpuva.lock); > > return __vm_bo; > } > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c > b/drivers/gpu/drm/panthor/panthor_mmu.c > index > 9f5f4ddf291024121f3fd5644f2fdeba354fa67c..be8811a70e1a3adec87ca4a85cad7c838f54bebf > 100644 > --- a/drivers/gpu/drm/panthor/panthor_mmu.c > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c > @@ -1224,17 +1224,7 @@ static int panthor_vm_prepare_map_op_ctx(struct > panthor_vm_op_ctx *op_ctx, > goto err_cleanup; > } > > - /* drm_gpuvm_bo_obtain_prealloc() will call drm_gpuvm_bo_put() on our > - * pre-allocated BO if the <BO,VM> association exists. Given we > - * only have one ref on preallocated_vm_bo, drm_gpuvm_bo_destroy() will > - * be called immediately, and we have to hold the VM resv lock when > - * calling this function. > - */ > - dma_resv_lock(panthor_vm_resv(vm), NULL); > - mutex_lock(&bo->base.base.gpuva.lock); > op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo); > - mutex_unlock(&bo->base.base.gpuva.lock); > - dma_resv_unlock(panthor_vm_resv(vm)); > > op_ctx->map.bo_offset = offset; > >
