On 10/18/25 00:22, Felix Kuehling wrote:
> 
> On 2025-10-13 09:48, Christian König wrote:
>> This should allow amdkfd_fences to outlive the amdgpu module.
>>
>> Signed-off-by: Christian König <[email protected]>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  6 ++++
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c  | 36 +++++++------------
>>   drivers/gpu/drm/amd/amdkfd/kfd_process.c      |  7 ++--
>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c          |  4 +--
>>   4 files changed, 24 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 9e120c934cc1..35c59c784b7b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -196,6 +196,7 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void 
>> *data);
>>   #endif
>>   #if IS_ENABLED(CONFIG_HSA_AMD)
>>   bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
>> +void amdkfd_fence_signal(struct dma_fence *f);
>>   struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
>>   void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo);
>>   int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
>> @@ -210,6 +211,11 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct 
>> mm_struct *mm)
>>       return false;
>>   }
>>   +static inline
>> +void amdkfd_fence_signal(struct dma_fence *f)
>> +{
>> +}
>> +
>>   static inline
>>   struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
>>   {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
>> index 09c919f72b6c..69bca4536326 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
>> @@ -127,29 +127,9 @@ static bool amdkfd_fence_enable_signaling(struct 
>> dma_fence *f)
>>           if (!svm_range_schedule_evict_svm_bo(fence))
>>               return true;
>>       }
>> -    return false;
>> -}
>> -
>> -/**
>> - * amdkfd_fence_release - callback that fence can be freed
>> - *
>> - * @f: dma_fence
>> - *
>> - * This function is called when the reference count becomes zero.
>> - * Drops the mm_struct reference and RCU schedules freeing up the fence.
>> - */
>> -static void amdkfd_fence_release(struct dma_fence *f)
>> -{
>> -    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>> -
>> -    /* Unconditionally signal the fence. The process is getting
>> -     * terminated.
>> -     */
>> -    if (WARN_ON(!fence))
>> -        return; /* Not an amdgpu_amdkfd_fence */
>> -
>>       mmdrop(fence->mm);
>> -    kfree_rcu(f, rcu);
>> +    fence->mm = NULL;
>> +    return false;
>>   }
>>     /**
>> @@ -174,9 +154,19 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct 
>> mm_struct *mm)
>>       return false;
>>   }
>>   +void amdkfd_fence_signal(struct dma_fence *f)
>> +{
>> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>> +
>> +    if (fence) {
>> +        mmdrop(fence->mm);
>> +        fence->mm = NULL;
> 
> Isn't fence->mm already NULL here if it was dropped in 
> amdkfd_fence_enable_signaling?

It looked like ther're some use cases which signals the fence without going 
through amdkfd_fence_enable_signaling.

E.g. kfd_process_wq_release which is most likely used on process tear down.

Regards,
Christian.

> 
> Regards,
>   Felix
> 
> 
>> +    }
>> +    dma_fence_signal(f);
>> +}
>> +
>>   static const struct dma_fence_ops amdkfd_fence_ops = {
>>       .get_driver_name = amdkfd_fence_get_driver_name,
>>       .get_timeline_name = amdkfd_fence_get_timeline_name,
>>       .enable_signaling = amdkfd_fence_enable_signaling,
>> -    .release = amdkfd_fence_release,
>>   };
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index ddfe30c13e9d..779d7701bac9 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -1177,7 +1177,7 @@ static void kfd_process_wq_release(struct work_struct 
>> *work)
>>       synchronize_rcu();
>>       ef = rcu_access_pointer(p->ef);
>>       if (ef)
>> -        dma_fence_signal(ef);
>> +        amdkfd_fence_signal(ef);
>>         kfd_process_remove_sysfs(p);
>>       kfd_debugfs_remove_process(p);
>> @@ -1986,7 +1986,6 @@ kfd_process_gpuid_from_node(struct kfd_process *p, 
>> struct kfd_node *node,
>>   static int signal_eviction_fence(struct kfd_process *p)
>>   {
>>       struct dma_fence *ef;
>> -    int ret;
>>         rcu_read_lock();
>>       ef = dma_fence_get_rcu_safe(&p->ef);
>> @@ -1994,10 +1993,10 @@ static int signal_eviction_fence(struct kfd_process 
>> *p)
>>       if (!ef)
>>           return -EINVAL;
>>   -    ret = dma_fence_signal(ef);
>> +    amdkfd_fence_signal(ef);
>>       dma_fence_put(ef);
>>   -    return ret;
>> +    return 0;
>>   }
>>     static void evict_process_worker(struct work_struct *work)
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 91609dd5730f..01ce2d853602 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -428,7 +428,7 @@ static void svm_range_bo_release(struct kref *kref)
>>         if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
>>           /* We're not in the eviction worker. Signal the fence. */
>> -        dma_fence_signal(&svm_bo->eviction_fence->base);
>> +        amdkfd_fence_signal(&svm_bo->eviction_fence->base);
>>       dma_fence_put(&svm_bo->eviction_fence->base);
>>       amdgpu_bo_unref(&svm_bo->bo);
>>       kfree(svm_bo);
>> @@ -3628,7 +3628,7 @@ static void svm_range_evict_svm_bo_worker(struct 
>> work_struct *work)
>>       mmap_read_unlock(mm);
>>       mmput(mm);
>>   -    dma_fence_signal(&svm_bo->eviction_fence->base);
>> +    amdkfd_fence_signal(&svm_bo->eviction_fence->base);
>>         /* This is the last reference to svm_bo, after 
>> svm_range_vram_node_free
>>        * has been called in svm_migrate_vram_to_ram

Reply via email to