On 2023-07-11 09:31, Christian König wrote:
Avoids quite a bit of logic and kmalloc overhead.

v2: fix multiple problems pointed out by Felix

Signed-off-by: Christian König <[email protected]>

Two nit-picks inline about DRM_EXEC_INTERRUPTIBLE_WAIT. With those fixed, the patch is

Reviewed-by: Felix Kuehling <[email protected]>



---
  drivers/gpu/drm/amd/amdgpu/Kconfig            |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   5 +-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 299 +++++++-----------
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  18 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   4 +
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c          |  45 ++-
  6 files changed, 162 insertions(+), 210 deletions(-)
[snip]
@@ -2538,50 +2489,41 @@ static int update_invalid_user_pages(struct 
amdkfd_process_info *process_info,
   */
  static int validate_invalid_user_pages(struct amdkfd_process_info 
*process_info)
  {
-       struct amdgpu_bo_list_entry *pd_bo_list_entries;
-       struct list_head resv_list, duplicates;
-       struct ww_acquire_ctx ticket;
+       struct ttm_operation_ctx ctx = { false, false };
        struct amdgpu_sync sync;
+       struct drm_exec exec;
struct amdgpu_vm *peer_vm;
        struct kgd_mem *mem, *tmp_mem;
        struct amdgpu_bo *bo;
-       struct ttm_operation_ctx ctx = { false, false };
-       int i, ret;
-
-       pd_bo_list_entries = kcalloc(process_info->n_vms,
-                                    sizeof(struct amdgpu_bo_list_entry),
-                                    GFP_KERNEL);
-       if (!pd_bo_list_entries) {
-               pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
-               ret = -ENOMEM;
-               goto out_no_mem;
-       }
-
-       INIT_LIST_HEAD(&resv_list);
-       INIT_LIST_HEAD(&duplicates);
+       int ret;
- /* Get all the page directory BOs that need to be reserved */
-       i = 0;
-       list_for_each_entry(peer_vm, &process_info->vm_list_head,
-                           vm_list_node)
-               amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
-                                   &pd_bo_list_entries[i++]);
-       /* Add the userptr_inval_list entries to resv_list */
-       list_for_each_entry(mem, &process_info->userptr_inval_list,
-                           validate_list.head) {
-               list_add_tail(&mem->resv_list.head, &resv_list);
-               mem->resv_list.bo = mem->validate_list.bo;
-               mem->resv_list.num_shared = mem->validate_list.num_shared;
-       }
+       amdgpu_sync_create(&sync);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);

This runs in a worker thread. So I think it doesn't need to be interruptible.


        /* Reserve all BOs and page tables for validation */
-       ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
-       WARN(!list_empty(&duplicates), "Duplicates should be empty");
-       if (ret)
-               goto out_free;
+       drm_exec_until_all_locked(&exec) {
+               /* Reserve all the page directories */
+               list_for_each_entry(peer_vm, &process_info->vm_list_head,
+                                   vm_list_node) {
+                       ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+                       drm_exec_retry_on_contention(&exec);
+                       if (unlikely(ret))
+                               goto unreserve_out;
+               }
- amdgpu_sync_create(&sync);
+               /* Reserve the userptr_inval_list entries to resv_list */
+               list_for_each_entry(mem, &process_info->userptr_inval_list,
+                                   validate_list) {
+                       struct drm_gem_object *gobj;
+
+                       gobj = &mem->bo->tbo.base;
+                       ret = drm_exec_prepare_obj(&exec, gobj, 1);
+                       drm_exec_retry_on_contention(&exec);
+                       if (unlikely(ret))
+                               goto unreserve_out;
+               }
+       }
ret = process_validate_vms(process_info);
        if (ret)

[snip]

@@ -1467,25 +1467,24 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
        uint32_t gpuidx;
        int r;
- INIT_LIST_HEAD(&ctx->validate_list);
-       for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
-               pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
-               if (!pdd) {
-                       pr_debug("failed to find device idx %d\n", gpuidx);
-                       return -EINVAL;
-               }
-               vm = drm_priv_to_vm(pdd->drm_priv);
-
-               ctx->tv[gpuidx].bo = &vm->root.bo->tbo;
-               ctx->tv[gpuidx].num_shared = 4;
-               list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
-       }
+       drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);

This function is only called from svm_range_validate_and_map, which has an "intr" parameter. If you pass that through, you could make DRM_EXEC_INTERRUPTIBLE_WAIT conditional on that.

Regards,
  Felix


+       drm_exec_until_all_locked(&ctx->exec) {
+               for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
+                       pdd = kfd_process_device_from_gpuidx(ctx->process, 
gpuidx);
+                       if (!pdd) {
+                               pr_debug("failed to find device idx %d\n", 
gpuidx);
+                               r = -EINVAL;
+                               goto unreserve_out;
+                       }
+                       vm = drm_priv_to_vm(pdd->drm_priv);
- r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
-                                  ctx->intr, NULL);
-       if (r) {
-               pr_debug("failed %d to reserve bo\n", r);
-               return r;
+                       r = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
+                       drm_exec_retry_on_contention(&ctx->exec);
+                       if (unlikely(r)) {
+                               pr_debug("failed %d to reserve bo\n", r);
+                               goto unreserve_out;
+                       }
+               }
        }
for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
@@ -1508,13 +1507,13 @@ static int svm_range_reserve_bos(struct 
svm_validate_context *ctx)
        return 0;
unreserve_out:
-       ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+       drm_exec_fini(&ctx->exec);
        return r;
  }
static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
  {
-       ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+       drm_exec_fini(&ctx->exec);
  }
static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)

Reply via email to