[Public]

We might still need to export each ring's number correctly; otherwise, the Mesa 
driver will consider there's no available ring supported from the driver and 
then further assert before submitting the user queue.

If we want to keep the ring number being zero, the Mesa driver may need an 
attachment change to allow the command submitted to the zero-ring number if the 
user queue is enabled.

Hi @Olsak, Marek Do you think it's fine to have the attachment patch for the 
userq support? Except for such changes, maybe we also need to clean up the 
IB-related part.

Regards,
      Prike

> -----Original Message-----
> From: amd-gfx <[email protected]> On Behalf Of Alex
> Deucher
> Sent: Thursday, March 13, 2025 10:41 PM
> To: [email protected]
> Cc: Deucher, Alexander <[email protected]>; Khatri, Sunil
> <[email protected]>
> Subject: [PATCH 02/11] drm/amdgpu: add ring flag for no user submissions
>
> This would be set by IPs which only accept submissions from the kernel, not
> userspace, such as when kernel queues are disabled. Don't expose the rings to
> userspace and reject any submissions in the CS IOCTL.
>
> Reviewed-by: Sunil Khatri<[email protected]>
> Signed-off-by: Alex Deucher <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  | 30 ++++++++++++++++--------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
>  3 files changed, 25 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 5df21529b3b13..5cc18034b75df 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -349,6 +349,10 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser
> *p,
>       ring = amdgpu_job_ring(job);
>       ib = &job->ibs[job->num_ibs++];
>
> +     /* submissions to kernel queus are disabled */
> +     if (ring->no_user_submission)
> +             return -EINVAL;
> +
>       /* MM engine doesn't support user fences */
>       if (p->uf_bo && ring->funcs->no_user_fence)
>               return -EINVAL;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index cd6eb7a3bc58a..3b7dfd56ccd0e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -408,7 +408,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
> *adev,
>       case AMDGPU_HW_IP_GFX:
>               type = AMD_IP_BLOCK_TYPE_GFX;
>               for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> -                     if (adev->gfx.gfx_ring[i].sched.ready)
> +                     if (adev->gfx.gfx_ring[i].sched.ready &&
> +                         !adev->gfx.gfx_ring[i].no_user_submission)
>                               ++num_rings;
>               ib_start_alignment = 32;
>               ib_size_alignment = 32;
> @@ -416,7 +417,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
> *adev,
>       case AMDGPU_HW_IP_COMPUTE:
>               type = AMD_IP_BLOCK_TYPE_GFX;
>               for (i = 0; i < adev->gfx.num_compute_rings; i++)
> -                     if (adev->gfx.compute_ring[i].sched.ready)
> +                     if (adev->gfx.compute_ring[i].sched.ready &&
> +                         !adev->gfx.compute_ring[i].no_user_submission)
>                               ++num_rings;
>               ib_start_alignment = 32;
>               ib_size_alignment = 32;
> @@ -424,7 +426,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
> *adev,
>       case AMDGPU_HW_IP_DMA:
>               type = AMD_IP_BLOCK_TYPE_SDMA;
>               for (i = 0; i < adev->sdma.num_instances; i++)
> -                     if (adev->sdma.instance[i].ring.sched.ready)
> +                     if (adev->sdma.instance[i].ring.sched.ready &&
> +                         !adev->gfx.gfx_ring[i].no_user_submission)
>                               ++num_rings;
>               ib_start_alignment = 256;
>               ib_size_alignment = 4;
> @@ -435,7 +438,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
> *adev,
>                       if (adev->uvd.harvest_config & (1 << i))
>                               continue;
>
> -                     if (adev->uvd.inst[i].ring.sched.ready)
> +                     if (adev->uvd.inst[i].ring.sched.ready &&
> +                         !adev->uvd.inst[i].ring.no_user_submission)
>                               ++num_rings;
>               }
>               ib_start_alignment = 256;
> @@ -444,7 +448,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
> *adev,
>       case AMDGPU_HW_IP_VCE:
>               type = AMD_IP_BLOCK_TYPE_VCE;
>               for (i = 0; i < adev->vce.num_rings; i++)
> -                     if (adev->vce.ring[i].sched.ready)
> +                     if (adev->vce.ring[i].sched.ready &&
> +                         !adev->vce.ring[i].no_user_submission)
>                               ++num_rings;
>               ib_start_alignment = 256;
>               ib_size_alignment = 4;
> @@ -456,7 +461,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
> *adev,
>                               continue;
>
>                       for (j = 0; j < adev->uvd.num_enc_rings; j++)
> -                             if (adev->uvd.inst[i].ring_enc[j].sched.ready)
> +                             if (adev->uvd.inst[i].ring_enc[j].sched.ready &&
> +                                 
> !adev->uvd.inst[i].ring_enc[j].no_user_submission)
>                                       ++num_rings;
>               }
>               ib_start_alignment = 256;
> @@ -468,7 +474,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
> *adev,
>                       if (adev->vcn.harvest_config & (1 << i))
>                               continue;
>
> -                     if (adev->vcn.inst[i].ring_dec.sched.ready)
> +                     if (adev->vcn.inst[i].ring_dec.sched.ready &&
> +                         !adev->vcn.inst[i].ring_dec.no_user_submission)
>                               ++num_rings;
>               }
>               ib_start_alignment = 256;
> @@ -481,7 +488,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
> *adev,
>                               continue;
>
>                       for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++)
> -                             if (adev->vcn.inst[i].ring_enc[j].sched.ready)
> +                             if (adev->vcn.inst[i].ring_enc[j].sched.ready &&
> +                                 
> !adev->vcn.inst[i].ring_enc[j].no_user_submission)
>                                       ++num_rings;
>               }
>               ib_start_alignment = 256;
> @@ -496,7 +504,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
> *adev,
>                               continue;
>
>                       for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
> -                             if (adev->jpeg.inst[i].ring_dec[j].sched.ready)
> +                             if (adev->jpeg.inst[i].ring_dec[j].sched.ready 
> &&
> +                                 
> !adev->jpeg.inst[i].ring_dec[j].no_user_submission)
>                                       ++num_rings;
>               }
>               ib_start_alignment = 256;
> @@ -504,7 +513,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
> *adev,
>               break;
>       case AMDGPU_HW_IP_VPE:
>               type = AMD_IP_BLOCK_TYPE_VPE;
> -             if (adev->vpe.ring.sched.ready)
> +             if (adev->vpe.ring.sched.ready &&
> +                 !adev->vpe.ring.no_user_submission)
>                       ++num_rings;
>               ib_start_alignment = 256;
>               ib_size_alignment = 4;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index b4fd1e17205e9..4a97afcb38b78 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -297,6 +297,7 @@ struct amdgpu_ring {
>       struct dma_fence        *vmid_wait;
>       bool                    has_compute_vm_bug;
>       bool                    no_scheduler;
> +     bool                    no_user_submission;
>       int                     hw_prio;
>       unsigned                num_hw_submission;
>       atomic_t                *sched_score;
> @@ -310,7 +311,6 @@ struct amdgpu_ring {
>       unsigned int    entry_index;
>       /* store the cached rptr to restore after reset */
>       uint64_t cached_rptr;
> -
>  };
>
>  #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), 
> (job), (ib)))
> --
> 2.48.1

Attachment: 0001-winsys-amdgpu-allow-to-submit-userq-at-invalidate-ri.patch
Description: 0001-winsys-amdgpu-allow-to-submit-userq-at-invalidate-ri.patch

Reply via email to