Am 25.09.24 um 21:59 schrieb Arunpravin Paneer Selvam:
[SNIP]
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+                           struct drm_file *filp)
+{
+       struct drm_amdgpu_userq_fence_info *fence_info = NULL;
+       struct drm_amdgpu_userq_wait *wait_info = data;
+       u32 *syncobj_handles, *bo_handles;
+       struct dma_fence **fences = NULL;
+       u32 num_syncobj, num_bo_handles;
+       struct drm_gem_object **gobj;
+       struct drm_exec exec;
+       int r, i, entry, cnt;
+       u64 num_fences = 0;
+
+       num_bo_handles = wait_info->num_bo_handles;
+       bo_handles = memdup_user(u64_to_user_ptr(wait_info->bo_handles_array),
+                                sizeof(u32) * num_bo_handles);
+       if (IS_ERR(bo_handles))
+               return PTR_ERR(bo_handles);
+
+       num_syncobj = wait_info->num_syncobj_handles;
+       syncobj_handles = 
memdup_user(u64_to_user_ptr(wait_info->syncobj_handles_array),
+                                     sizeof(u32) * num_syncobj);
+       if (IS_ERR(syncobj_handles)) {
+               r = PTR_ERR(syncobj_handles);
+               goto free_bo_handles;
+       }
+
+       /* Array of GEM object handles */
+       gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
+       if (!gobj) {
+               r = -ENOMEM;
+               goto free_syncobj_handles;
+       }
+
+       for (entry = 0; entry < num_bo_handles; entry++) {
+               gobj[entry] = drm_gem_object_lookup(filp, bo_handles[entry]);
+               if (!gobj[entry]) {
+                       r = -ENOENT;
+                       goto put_gobj;
+               }
+       }
+
+       drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+       drm_exec_until_all_locked(&exec) {
+               r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 0);
+               drm_exec_retry_on_contention(&exec);
+               if (r) {
+                       drm_exec_fini(&exec);
+                       goto put_gobj;
+               }
+       }
+
+       if (!wait_info->num_fences) {
+               /* Count syncobj's fence */
+               for (i = 0; i < num_syncobj; i++) {
+                       struct dma_fence *fence;
+
+                       r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+                                                  0, 0, &fence);
+                       dma_fence_put(fence);
+
+                       if (r || !fence)
+                               continue;
+
+                       num_fences++;
+               }
+
+               /* Count GEM objects fence */
+               for (i = 0; i < num_bo_handles; i++) {
+                       struct dma_resv_iter resv_cursor;
+                       struct dma_fence *fence;
+
+                       dma_resv_for_each_fence(&resv_cursor, gobj[i]->resv,
+                                               
dma_resv_usage_rw(wait_info->bo_wait_flags &
+                                               AMDGPU_USERQ_BO_WRITE), fence)
+                               num_fences++;

We should probably adjust the UAPI here once more.

The problem is that we only provide the AMDGPU_USERQ_BO_WRITE for the whole IOCTL instead of per BO.

So the best approach would probably be to drop the AMDGPU_USERQ_BO_WRITE flag and split up the array of BOs into readers and writers.

Can you work on that Arun? Shouldn't be more than a bit typing exercise.

Thanks,
Christian.

Reply via email to