From: Mukul Joshi <[email protected]>

Update interrupt handling in CPX mode for GFX9.4.3 by using the
VMID space instead of SDMA client id to determine if an interrupt
should be processed by a KFD node. This is especially needed for
handling retry faults from MMHUB.

Signed-off-by: Mukul Joshi <[email protected]>
Reviewed-by: Felix Kuehling <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  7 +++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h  | 16 ++++++----------
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c   |  8 ++++----
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h   |  2 +-
 6 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a5064d95e7f5..e9b4b3c68b1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2431,6 +2431,9 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
  * amdgpu_vm_handle_fault - graceful handling of VM faults.
  * @adev: amdgpu device pointer
  * @pasid: PASID of the VM
+ * @vmid: VMID, only used for GFX 9.4.3.
+ * @node_id: Node_id received in IH cookie. Only applicable for
+ *           GFX 9.4.3.
  * @addr: Address of the fault
  * @write_fault: true is write fault, false is read fault
  *
@@ -2438,7 +2441,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
  * shouldn't be reported any more.
  */
 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
-                           u32 client_id, u32 node_id, uint64_t addr,
+                           u32 vmid, u32 node_id, uint64_t addr,
                            bool write_fault)
 {
        bool is_compute_context = false;
@@ -2463,7 +2466,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
 
        addr /= AMDGPU_GPU_PAGE_SIZE;
 
-       if (is_compute_context && !svm_range_restore_pages(adev, pasid, 
client_id,
+       if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
            node_id, addr, write_fault)) {
                amdgpu_bo_unref(&root);
                return true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 721bc55bfafa..14864a8541ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -455,7 +455,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device 
*adev);
 void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
                             struct amdgpu_task_info *task_info);
 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
-                           u32 client_id, u32 node_id, uint64_t addr,
+                           u32 vmid, u32 node_id, uint64_t addr,
                            bool write_fault);
 
 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 5c209f2d38c6..d819b544b043 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -593,8 +593,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device 
*adev,
                /* Try to handle the recoverable page faults by filling page
                 * tables
                 */
-               if (amdgpu_vm_handle_fault(adev, entry->pasid, 
entry->client_id, node_id,
-                                          addr, write_fault))
+               if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid,
+                                          node_id, addr, write_fault))
                        return 1;
        }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index df372de6b056..fb3cf2c51da8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1073,18 +1073,14 @@ struct kfd_topology_device 
*kfd_topology_device_by_id(uint32_t gpu_id);
 struct kfd_node *kfd_device_by_id(uint32_t gpu_id);
 struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev);
 struct kfd_node *kfd_device_by_adev(const struct amdgpu_device *adev);
-static inline bool kfd_irq_is_from_node(struct kfd_node *node, uint32_t 
client_id,
-                                    uint32_t node_id)
+static inline bool kfd_irq_is_from_node(struct kfd_node *node, uint32_t 
node_id,
+                                       uint32_t vmid)
 {
-       if ((node->interrupt_bitmap & (0x1U << node_id)) ||
-           ((node_id % 4) == 0 &&
-           (node->interrupt_bitmap >> 16) & (0x1U << client_id)))
-               return true;
-
-       return false;
+       return (node->interrupt_bitmap & (1 << node_id)) != 0 &&
+              (node->compute_vmid_bitmap & (1 << vmid)) != 0;
 }
 static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
-                                       uint32_t client_id, uint32_t node_id) {
+                                       uint32_t node_id, uint32_t vmid) {
        struct kfd_dev *dev = adev->kfd.dev;
        uint32_t i;
 
@@ -1092,7 +1088,7 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct 
amdgpu_device *adev,
                return dev->nodes[0];
 
        for (i = 0; i < dev->num_nodes; i++)
-               if (kfd_irq_is_from_node(dev->nodes[i], client_id, node_id))
+               if (kfd_irq_is_from_node(dev->nodes[i], node_id, vmid))
                        return dev->nodes[i];
 
        return NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 0e2b21ec468c..8bd9d88655b8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2788,7 +2788,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool 
write_fault)
 
 int
 svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
-                       uint32_t client_id, uint32_t node_id,
+                       uint32_t vmid, uint32_t node_id,
                        uint64_t addr, bool write_fault)
 {
        struct mm_struct *mm = NULL;
@@ -2840,10 +2840,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
                goto out;
        }
 
-       node = kfd_node_by_irq_ids(adev, node_id, client_id);
+       node = kfd_node_by_irq_ids(adev, node_id, vmid);
        if (!node) {
-               pr_debug("kfd node does not exist node_id: %d, client_id: 
%d\n", node_id,
-                        client_id);
+               pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", 
node_id,
+                        vmid);
                r = -EFAULT;
                goto out;
        }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index a165c73b40b2..5116786718b6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -173,7 +173,7 @@ int svm_range_split_by_granularity(struct kfd_process *p, 
struct mm_struct *mm,
                               unsigned long addr, struct svm_range *parent,
                               struct svm_range *prange);
 int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
-                           uint32_t client_id, uint32_t node_id, uint64_t addr,
+                           uint32_t vmid, uint32_t node_id, uint64_t addr,
                            bool write_fault);
 int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
 void svm_range_add_list_work(struct svm_range_list *svms,
-- 
2.39.2

Reply via email to