On 7/2/26 23:22, Ulisses Paixao wrote:
> The functions gfx_v11_0_handle_priv_fault and
> gfx_v12_0_handle_priv_fault share the same logic for searching and
> triggering a scheduler fault on a ring. This patch moves the shared
> ring-searching logic to a common function, amdgpu_gfx_handle_priv_fault,
> in amdgpu_gfx.c. The hardware-specific decoding of ring IDs remains in
> the version-specific files to maintain proper architectural separation.
>
> Signed-off-by: Ulisses Paixao <[email protected]>
> Co-developed-by: Felipe Sousa <[email protected]>
> Signed-off-by: Felipe Sousa <[email protected]>
> Reviewed-by: Christian König <[email protected]>
> ---
> v5:
> Return early on adv->gfx.disable_kq check.
>
> v4:
> Restore the adev->gfx.disable_kq check to prevent falsely triggering
> scheduler faults on idle kernel rings when MES is managing user queues.
>
> v3:
> Return early if the ring is found in the gfx rings loop.
>
> v2:
> Keep the HW-specific decoding in gfx_v11_0.c and gfx_v12_0.c.
> Remove the redundant check for adev->gfx.disable_kq.
> Simplify the search loop in amdgpu_gfx_handle_priv_fault to iterate over
> all gfx and compute rings without a switch statement.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 40 +++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++
> drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 27 +----------------
> drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 27 +----------------
> 4 files changed, 44 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index b8ca87669..47e06a585 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -830,6 +830,46 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev,
> int xcc_id)
> return r;
> }
>
> +/**
> + * amdgpu_gfx_handle_priv_fault - Handle privileged instruction fault
> + *
> + * @adev: amdgpu_device pointer
> + * @me_id: micro-engine ID of the faulty ring
> + * @pipe_id: pipe ID of the faulty ring
> + * @queue_id: queue ID of the faulty ring
> + *
> + * This function handles privileged instruction faults by identifying
> + * the faulty ring (gfx or compute) and triggering a scheduler fault
> + */
> +void amdgpu_gfx_handle_priv_fault(struct amdgpu_device *adev,
> + u8 me_id, u8 pipe_id, u8 queue_id)
> +{
> + struct amdgpu_ring *ring;
> + int i;
> +
> + /* The scheduler only handles kernel queues so this is a no-op
> + without them. */
> + if (adev->gfx.disable_kq) {
> + return;
> + }
Just style nit picks, the comment style should be like this:
/*
* Text.....
*/
And drop the extra {} around the return, checkpatch.pl would complain about
that.
Christian.
> +
> + for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
> + ring = &adev->gfx.gfx_ring[i];
> + if (ring->me == me_id && ring->pipe == pipe_id &&
> + ring->queue == queue_id) {
> + drm_sched_fault(&ring->sched);
> + return;
> + }
> + }
> +
> + for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> + ring = &adev->gfx.compute_ring[i];
> + if (ring->me == me_id && ring->pipe == pipe_id &&
> + ring->queue == queue_id)
> + drm_sched_fault(&ring->sched);
> + }
> +}
> +
> static void amdgpu_gfx_do_off_ctrl(struct amdgpu_device *adev, bool enable,
> bool no_delay)
> {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index a0cf0a3b4..0b2f6ce85 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -611,6 +611,8 @@ bool amdgpu_gfx_is_high_priority_graphics_queue(struct
> amdgpu_device *adev,
> struct amdgpu_ring *ring);
> bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
> int pipe, int queue);
> +void amdgpu_gfx_handle_priv_fault(struct amdgpu_device *adev,
> + u8 me_id, u8 pipe_id, u8 queue_id);
> void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
> void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable);
> int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 2c6f1e25c..888c9f3c4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -6688,37 +6688,12 @@ static void gfx_v11_0_handle_priv_fault(struct
> amdgpu_device *adev,
> struct amdgpu_iv_entry *entry)
> {
> u8 me_id, pipe_id, queue_id;
> - struct amdgpu_ring *ring;
> - int i;
>
> me_id = (entry->ring_id & 0x0c) >> 2;
> pipe_id = (entry->ring_id & 0x03) >> 0;
> queue_id = (entry->ring_id & 0x70) >> 4;
>
> - if (!adev->gfx.disable_kq) {
> - switch (me_id) {
> - case 0:
> - for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
> - ring = &adev->gfx.gfx_ring[i];
> - if (ring->me == me_id && ring->pipe == pipe_id
> &&
> - ring->queue == queue_id)
> - drm_sched_fault(&ring->sched);
> - }
> - break;
> - case 1:
> - case 2:
> - for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> - ring = &adev->gfx.compute_ring[i];
> - if (ring->me == me_id && ring->pipe == pipe_id
> &&
> - ring->queue == queue_id)
> - drm_sched_fault(&ring->sched);
> - }
> - break;
> - default:
> - BUG();
> - break;
> - }
> - }
> + amdgpu_gfx_handle_priv_fault(adev, me_id, pipe_id, queue_id);
> }
>
> static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> index 6baac533a..3f0d29372 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> @@ -5019,37 +5019,12 @@ static void gfx_v12_0_handle_priv_fault(struct
> amdgpu_device *adev,
> struct amdgpu_iv_entry *entry)
> {
> u8 me_id, pipe_id, queue_id;
> - struct amdgpu_ring *ring;
> - int i;
>
> me_id = (entry->ring_id & 0x0c) >> 2;
> pipe_id = (entry->ring_id & 0x03) >> 0;
> queue_id = (entry->ring_id & 0x70) >> 4;
>
> - if (!adev->gfx.disable_kq) {
> - switch (me_id) {
> - case 0:
> - for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
> - ring = &adev->gfx.gfx_ring[i];
> - if (ring->me == me_id && ring->pipe == pipe_id
> &&
> - ring->queue == queue_id)
> - drm_sched_fault(&ring->sched);
> - }
> - break;
> - case 1:
> - case 2:
> - for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> - ring = &adev->gfx.compute_ring[i];
> - if (ring->me == me_id && ring->pipe == pipe_id
> &&
> - ring->queue == queue_id)
> - drm_sched_fault(&ring->sched);
> - }
> - break;
> - default:
> - BUG();
> - break;
> - }
> - }
> + amdgpu_gfx_handle_priv_fault(adev, me_id, pipe_id, queue_id);
> }
>
> static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev,