On 12/11/25 17:12, Tvrtko Ursulin wrote:
>
> On 11/12/2025 13:16, Christian König wrote:
>> This allows amdgpu_userq_fences to outlive the amdgpu module.
>>
>> Signed-off-by: Christian König <[email protected]>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 13 +----
>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 54 ++++---------------
>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 8 ---
>> 3 files changed, 11 insertions(+), 64 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 2dfbddcef9ab..f206297aae8b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -3155,11 +3155,7 @@ static int __init amdgpu_init(void)
>> r = amdgpu_sync_init();
>> if (r)
>> - goto error_sync;
>> -
>> - r = amdgpu_userq_fence_slab_init();
>> - if (r)
>> - goto error_fence;
>> + return r;
>> DRM_INFO("amdgpu kernel modesetting enabled.\n");
>> amdgpu_register_atpx_handler();
>> @@ -3176,12 +3172,6 @@ static int __init amdgpu_init(void)
>> /* let modprobe override vga console setting */
>> return pci_register_driver(&amdgpu_kms_pci_driver);
>> -
>> -error_fence:
>> - amdgpu_sync_fini();
>> -
>> -error_sync:
>> - return r;
>> }
>> static void __exit amdgpu_exit(void)
>> @@ -3191,7 +3181,6 @@ static void __exit amdgpu_exit(void)
>> amdgpu_unregister_atpx_handler();
>> amdgpu_acpi_release();
>> amdgpu_sync_fini();
>> - amdgpu_userq_fence_slab_fini();
>> mmu_notifier_synchronize();
>> amdgpu_xcp_drv_release();
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> index eba9fb359047..bb19f72770b0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> @@ -33,26 +33,6 @@
>> #include "amdgpu_userq_fence.h"
>> static const struct dma_fence_ops amdgpu_userq_fence_ops;
>> -static struct kmem_cache *amdgpu_userq_fence_slab;
>> -
>> -int amdgpu_userq_fence_slab_init(void)
>> -{
>> - amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
>> - sizeof(struct amdgpu_userq_fence),
>> - 0,
>> - SLAB_HWCACHE_ALIGN,
>> - NULL);
>> - if (!amdgpu_userq_fence_slab)
>> - return -ENOMEM;
>> -
>> - return 0;
>> -}
>> -
>> -void amdgpu_userq_fence_slab_fini(void)
>> -{
>> - rcu_barrier();
>
> What was this rcu_barrier() for? Cargo culted or more to it?
All dma_fences are RCU protected. When they are backed by a kmem_cache you need
to make sure to wait for an RCU grace period to pass before destroying the
kmem_cache.
Since the dma_fence framework now uses kfree_rcu that shouldn't be problematic
any more.
>> - kmem_cache_destroy(amdgpu_userq_fence_slab);
>> -}
>> static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct
>> dma_fence *f)
>> {
>> @@ -227,7 +207,7 @@ void amdgpu_userq_fence_driver_put(struct
>> amdgpu_userq_fence_driver *fence_drv)
>> static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence
>> **userq_fence)
>> {
>> - *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
>> + *userq_fence = kmalloc(sizeof(**userq_fence), GFP_ATOMIC);
> This GFP_ATOMIC is suboptimal for sure being on the ioctl path. It is outside
> of the scope for this patch, but once my userq cleanup patches get reviewed
> next on my list was to try and understand this.
>> return *userq_fence ? 0 : -ENOMEM;
>> }
>> @@ -243,12 +223,11 @@ static int amdgpu_userq_fence_create(struct
>> amdgpu_usermode_queue *userq,
>> if (!fence_drv)
>> return -EINVAL;
>> - spin_lock_init(&userq_fence->lock);
>> INIT_LIST_HEAD(&userq_fence->link);
>> fence = &userq_fence->base;
>> userq_fence->fence_drv = fence_drv;
>> - dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
>> + dma_fence_init64(fence, &amdgpu_userq_fence_ops, NULL,
>> fence_drv->context, seq);
>> amdgpu_userq_fence_driver_get(fence_drv);
>> @@ -318,35 +297,22 @@ static bool amdgpu_userq_fence_signaled(struct
>> dma_fence *f)
>> rptr = amdgpu_userq_fence_read(fence_drv);
>> wptr = fence->base.seqno;
>> - if (rptr >= wptr)
>> + if (rptr >= wptr) {
>> + amdgpu_userq_fence_driver_put(fence->fence_drv);
>
> fence_drv is in a local already.
>
>> + fence->fence_drv = NULL;
>
> amdgpu_userq_fence_get_timeline_name could now oops somehow?
>
>> +
>> + kvfree(fence->fence_drv_array);
>> + fence->fence_drv_array = NULL;
>
> Not sure if this is safe either. amdgpu_userq_fence_driver_process() drops
> its reference before it unlinks the fence from the list. Can someone external
> trigger the fence_is_signaled check, before the interrupt processing kicks
> in, which will clear fence_drv_array, and so
> amdgpu_userq_fence_driver_process() would oops?
Oh, good question. I need to double check that.
Thanks,
Christian.
>
> Regards,
>
> Tvrtko
>
>> return true;
>> + }
>> return false;
>> }
>> -static void amdgpu_userq_fence_free(struct rcu_head *rcu)
>> -{
>> - struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
>> - struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
>> - struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
>> -
>> - /* Release the fence driver reference */
>> - amdgpu_userq_fence_driver_put(fence_drv);
>> -
>> - kvfree(userq_fence->fence_drv_array);
>> - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
>> -}
>> -
>> -static void amdgpu_userq_fence_release(struct dma_fence *f)
>> -{
>> - call_rcu(&f->rcu, amdgpu_userq_fence_free);
>> -}
>> -
>> static const struct dma_fence_ops amdgpu_userq_fence_ops = {
>> .get_driver_name = amdgpu_userq_fence_get_driver_name,
>> .get_timeline_name = amdgpu_userq_fence_get_timeline_name,
>> .signaled = amdgpu_userq_fence_signaled,
>> - .release = amdgpu_userq_fence_release,
>> };
>> /**
>> @@ -560,7 +526,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev,
>> void *data,
>> r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
>> if (r) {
>> mutex_unlock(&userq_mgr->userq_mutex);
>> - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
>> + kfree(userq_fence);
>> goto put_gobj_write;
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> index d76add2afc77..6f04782f3ea9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> @@ -31,11 +31,6 @@
>> struct amdgpu_userq_fence {
>> struct dma_fence base;
>> - /*
>> - * This lock is necessary to synchronize the
>> - * userqueue dma fence operations.
>> - */
>> - spinlock_t lock;
>> struct list_head link;
>> unsigned long fence_drv_array_count;
>> struct amdgpu_userq_fence_driver *fence_drv;
>> @@ -58,9 +53,6 @@ struct amdgpu_userq_fence_driver {
>> char timeline_name[TASK_COMM_LEN];
>> };
>> -int amdgpu_userq_fence_slab_init(void);
>> -void amdgpu_userq_fence_slab_fini(void);
>> -
>> void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver
>> *fence_drv);
>> void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver
>> *fence_drv);
>> int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
>