This allows amdkfd_fences to outlive the amdgpu module.

v2: implement Felix suggestion to lock the fence while signaling it.
v3: fix typos
v4: fix return code in signal_eviction_fence

Signed-off-by: Christian König <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  7 +++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c  | 44 +++++++++----------
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c          |  4 +-
 4 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 8bdfcde2029b..2f2b277cfaed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -196,6 +196,7 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void 
*data);
 #endif
 #if IS_ENABLED(CONFIG_HSA_AMD)
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
+bool amdkfd_fence_signal(struct dma_fence *f);
 struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
 void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo);
 int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
@@ -210,6 +211,12 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct 
mm_struct *mm)
        return false;
 }
 
+static inline
+bool amdkfd_fence_signal(struct dma_fence *f)
+{
+       return false;
+}
+
 static inline
 struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 09c919f72b6c..9cd413e325f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -127,29 +127,9 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence 
*f)
                if (!svm_range_schedule_evict_svm_bo(fence))
                        return true;
        }
-       return false;
-}
-
-/**
- * amdkfd_fence_release - callback that fence can be freed
- *
- * @f: dma_fence
- *
- * This function is called when the reference count becomes zero.
- * Drops the mm_struct reference and RCU schedules freeing up the fence.
- */
-static void amdkfd_fence_release(struct dma_fence *f)
-{
-       struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
-
-       /* Unconditionally signal the fence. The process is getting
-        * terminated.
-        */
-       if (WARN_ON(!fence))
-               return; /* Not an amdgpu_amdkfd_fence */
-
        mmdrop(fence->mm);
-       kfree_rcu(f, rcu);
+       fence->mm = NULL;
+       return false;
 }
 
 /**
@@ -174,9 +154,27 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct 
mm_struct *mm)
        return false;
 }
 
+bool amdkfd_fence_signal(struct dma_fence *f)
+{
+       struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+       unsigned long flags;
+       bool was_signaled;
+
+       dma_fence_lock_irqsave(f, flags);
+       if (fence->mm) {
+               mmdrop(fence->mm);
+               fence->mm = NULL;
+       }
+       was_signaled = dma_fence_is_signaled_locked(f);
+       if (!was_signaled)
+               dma_fence_signal_locked(f);
+       dma_fence_unlock_irqrestore(f, flags);
+
+       return was_signaled;
+}
+
 static const struct dma_fence_ops amdkfd_fence_ops = {
        .get_driver_name = amdkfd_fence_get_driver_name,
        .get_timeline_name = amdkfd_fence_get_timeline_name,
        .enable_signaling = amdkfd_fence_enable_signaling,
-       .release = amdkfd_fence_release,
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index bb252ec43733..2cf39e3d3fae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1173,7 +1173,7 @@ static void kfd_process_wq_release(struct work_struct 
*work)
        synchronize_rcu();
        ef = rcu_access_pointer(p->ef);
        if (ef)
-               dma_fence_signal(ef);
+               amdkfd_fence_signal(ef);
 
        kfd_process_remove_sysfs(p);
        kfd_debugfs_remove_process(p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 97c2270f278f..0e94f3a976b1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -428,7 +428,7 @@ static void svm_range_bo_release(struct kref *kref)
 
        if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
                /* We're not in the eviction worker. Signal the fence. */
-               dma_fence_signal(&svm_bo->eviction_fence->base);
+               amdkfd_fence_signal(&svm_bo->eviction_fence->base);
        dma_fence_put(&svm_bo->eviction_fence->base);
        amdgpu_bo_unref(&svm_bo->bo);
        kfree(svm_bo);
@@ -3628,7 +3628,7 @@ static void svm_range_evict_svm_bo_worker(struct 
work_struct *work)
        mmap_read_unlock(mm);
        mmput(mm);
 
-       dma_fence_signal(&svm_bo->eviction_fence->base);
+       amdkfd_fence_signal(&svm_bo->eviction_fence->base);
 
        /* This is the last reference to svm_bo, after svm_range_vram_node_free
         * has been called in svm_migrate_vram_to_ram
-- 
2.43.0

Reply via email to