On 7/2/26 23:22, Ulisses Paixao wrote:
> The functions gfx_v11_0_handle_priv_fault and
> gfx_v12_0_handle_priv_fault share the same logic for searching and
> triggering a scheduler fault on a ring. This patch moves the shared
> ring-searching logic to a common function, amdgpu_gfx_handle_priv_fault,
> in amdgpu_gfx.c. The hardware-specific decoding of ring IDs remains in
> the version-specific files to maintain proper architectural separation.
> 
> Signed-off-by: Ulisses Paixao <[email protected]>
> Co-developed-by: Felipe Sousa <[email protected]>
> Signed-off-by: Felipe Sousa <[email protected]>
> Reviewed-by: Christian König <[email protected]>
> ---
> v5:
> Return early on adv->gfx.disable_kq check.
> 
> v4:
> Restore the adev->gfx.disable_kq check to prevent falsely triggering
> scheduler faults on idle kernel rings when MES is managing user queues.
> 
> v3:
> Return early if the ring is found in the gfx rings loop.
> 
> v2:
> Keep the HW-specific decoding in gfx_v11_0.c and gfx_v12_0.c.
> Remove the redundant check for adev->gfx.disable_kq.
> Simplify the search loop in amdgpu_gfx_handle_priv_fault to iterate over
> all gfx and compute rings without a switch statement.
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 40 +++++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  2 ++
>  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 27 +----------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  | 27 +----------------
>  4 files changed, 44 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index b8ca87669..47e06a585 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -830,6 +830,46 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, 
> int xcc_id)
>       return r;
>  }
>  
> +/**
> + * amdgpu_gfx_handle_priv_fault - Handle privileged instruction fault
> + *
> + * @adev: amdgpu_device pointer
> + * @me_id: micro-engine ID of the faulty ring
> + * @pipe_id: pipe ID of the faulty ring
> + * @queue_id: queue ID of the faulty ring
> + *
> + * This function handles privileged instruction faults by identifying
> + * the faulty ring (gfx or compute) and triggering a scheduler fault
> + */
> +void amdgpu_gfx_handle_priv_fault(struct amdgpu_device *adev,
> +                                     u8 me_id, u8 pipe_id, u8 queue_id)
> +{
> +     struct amdgpu_ring *ring;
> +     int i;
> +
> +     /* The scheduler only handles kernel queues so this is a no-op 
> +     without them. */
> +     if (adev->gfx.disable_kq) {
> +             return;
> +     }

Just style nit picks, the comment style should be like this:

/*
 * Text.....
 */

And drop the extra {} around the return, checkpatch.pl would complain about 
that.

Christian.


> +
> +     for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
> +             ring = &adev->gfx.gfx_ring[i];
> +             if (ring->me == me_id && ring->pipe == pipe_id &&
> +                 ring->queue == queue_id) {
> +                     drm_sched_fault(&ring->sched);
> +                     return;
> +             }
> +     }
> +     
> +     for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> +             ring = &adev->gfx.compute_ring[i];
> +             if (ring->me == me_id && ring->pipe == pipe_id &&
> +                 ring->queue == queue_id)
> +                     drm_sched_fault(&ring->sched);
> +     }
> +}
> +
>  static void amdgpu_gfx_do_off_ctrl(struct amdgpu_device *adev, bool enable,
>                                  bool no_delay)
>  {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index a0cf0a3b4..0b2f6ce85 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -611,6 +611,8 @@ bool amdgpu_gfx_is_high_priority_graphics_queue(struct 
> amdgpu_device *adev,
>                                               struct amdgpu_ring *ring);
>  bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
>                                   int pipe, int queue);
> +void amdgpu_gfx_handle_priv_fault(struct amdgpu_device *adev,
> +                                     u8 me_id, u8 pipe_id, u8 queue_id);
>  void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
>  void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable);
>  int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 2c6f1e25c..888c9f3c4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -6688,37 +6688,12 @@ static void gfx_v11_0_handle_priv_fault(struct 
> amdgpu_device *adev,
>                                       struct amdgpu_iv_entry *entry)
>  {
>       u8 me_id, pipe_id, queue_id;
> -     struct amdgpu_ring *ring;
> -     int i;
>  
>       me_id = (entry->ring_id & 0x0c) >> 2;
>       pipe_id = (entry->ring_id & 0x03) >> 0;
>       queue_id = (entry->ring_id & 0x70) >> 4;
>  
> -     if (!adev->gfx.disable_kq) {
> -             switch (me_id) {
> -             case 0:
> -                     for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
> -                             ring = &adev->gfx.gfx_ring[i];
> -                             if (ring->me == me_id && ring->pipe == pipe_id 
> &&
> -                                 ring->queue == queue_id)
> -                                     drm_sched_fault(&ring->sched);
> -                     }
> -                     break;
> -             case 1:
> -             case 2:
> -                     for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> -                             ring = &adev->gfx.compute_ring[i];
> -                             if (ring->me == me_id && ring->pipe == pipe_id 
> &&
> -                                 ring->queue == queue_id)
> -                                     drm_sched_fault(&ring->sched);
> -                     }
> -                     break;
> -             default:
> -                     BUG();
> -                     break;
> -             }
> -     }
> +     amdgpu_gfx_handle_priv_fault(adev, me_id, pipe_id, queue_id);
>  }
>  
>  static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> index 6baac533a..3f0d29372 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> @@ -5019,37 +5019,12 @@ static void gfx_v12_0_handle_priv_fault(struct 
> amdgpu_device *adev,
>                                       struct amdgpu_iv_entry *entry)
>  {
>       u8 me_id, pipe_id, queue_id;
> -     struct amdgpu_ring *ring;
> -     int i;
>  
>       me_id = (entry->ring_id & 0x0c) >> 2;
>       pipe_id = (entry->ring_id & 0x03) >> 0;
>       queue_id = (entry->ring_id & 0x70) >> 4;
>  
> -     if (!adev->gfx.disable_kq) {
> -             switch (me_id) {
> -             case 0:
> -                     for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
> -                             ring = &adev->gfx.gfx_ring[i];
> -                             if (ring->me == me_id && ring->pipe == pipe_id 
> &&
> -                                 ring->queue == queue_id)
> -                                     drm_sched_fault(&ring->sched);
> -                     }
> -                     break;
> -             case 1:
> -             case 2:
> -                     for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> -                             ring = &adev->gfx.compute_ring[i];
> -                             if (ring->me == me_id && ring->pipe == pipe_id 
> &&
> -                                 ring->queue == queue_id)
> -                                     drm_sched_fault(&ring->sched);
> -                     }
> -                     break;
> -             default:
> -                     BUG();
> -                     break;
> -             }
> -     }
> +     amdgpu_gfx_handle_priv_fault(adev, me_id, pipe_id, queue_id);
>  }
>  
>  static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev,

Reply via email to