Add gfx ras poison consumption irq handling on gfx v11_0_3.

V2:
  Move ras poison consumption irq handling code of gfx
     v11_0_3 to gfx_v11_0_3.c.
V5:
  Create dedicated irq handler for RLC_GC_FED_INTERRUPT.

V6:
  Remove invalid function call.

Signed-off-by: YiPeng Chai <[email protected]>
Reviewed-by: Hawking Zhang <[email protected]>
Reviewed-by: Tao Zhou <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h       |  4 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c        | 24 +++++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c      | 50 ++++++++++++++++++-
 .../include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h  |  2 +
 4 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 6b26597217ed..0b39fe3cd624 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -210,6 +210,9 @@ struct amdgpu_gfx_ras {
        struct amdgpu_ras_block_object  ras_block;
        void (*enable_watchdog_timer)(struct amdgpu_device *adev);
        bool (*query_utcl2_poison_status)(struct amdgpu_device *adev);
+       int (*rlc_gc_fed_irq)(struct amdgpu_device *adev,
+                               struct amdgpu_irq_src *source,
+                               struct amdgpu_iv_entry *entry);
 };
 
 struct amdgpu_gfx_funcs {
@@ -323,6 +326,7 @@ struct amdgpu_gfx {
        struct amdgpu_irq_src           priv_inst_irq;
        struct amdgpu_irq_src           cp_ecc_error_irq;
        struct amdgpu_irq_src           sq_irq;
+       struct amdgpu_irq_src           rlc_gc_fed_irq;
        struct sq_work                  sq_work;
 
        /* gfx status */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 82beb46788cf..cc634cae77d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1338,6 +1338,13 @@ static int gfx_v11_0_sw_init(void *handle)
        if (r)
                return r;
 
+       /* FED error */
+       r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+                                 GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
+                                 &adev->gfx.rlc_gc_fed_irq);
+       if (r)
+               return r;
+
        adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
 
        if (adev->gfx.imu.funcs) {
@@ -6034,6 +6041,16 @@ static int gfx_v11_0_priv_inst_irq(struct amdgpu_device 
*adev,
        return 0;
 }
 
+static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev,
+                                 struct amdgpu_irq_src *source,
+                                 struct amdgpu_iv_entry *entry)
+{
+       if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq)
+               return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry);
+
+       return 0;
+}
+
 #if 0
 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
                                             struct amdgpu_irq_src *src,
@@ -6264,6 +6281,10 @@ static const struct amdgpu_irq_src_funcs 
gfx_v11_0_priv_inst_irq_funcs = {
        .process = gfx_v11_0_priv_inst_irq,
 };
 
+static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
+       .process = gfx_v11_0_rlc_gc_fed_irq,
+};
+
 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
 {
        adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
@@ -6274,6 +6295,9 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device 
*adev)
 
        adev->gfx.priv_inst_irq.num_types = 1;
        adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
+
+       adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
+       adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
 }
 
 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
index 5966d984a30a..a18e09de31dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
@@ -22,6 +22,54 @@
  */
 
 #include "amdgpu.h"
+#include "soc21.h"
+#include "gc/gc_11_0_3_offset.h"
+#include "gc/gc_11_0_3_sh_mask.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "gfx_v11_0.h"
 
 
-struct amdgpu_gfx_ras gfx_v11_0_3_ras;
+static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev,
+                                 struct amdgpu_irq_src *source,
+                                 struct amdgpu_iv_entry *entry)
+{
+       uint32_t rlc_status0 = 0, rlc_status1 = 0;
+       struct ras_common_if *ras_if = NULL;
+       struct ras_dispatch_if ih_data = {
+               .entry = entry,
+       };
+
+       rlc_status0 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_0));
+       rlc_status1 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_1));
+
+       if (!rlc_status0 && !rlc_status1) {
+               dev_warn(adev->dev, "RLC_GC_FED irq is generated, but 
rlc_status0 and rlc_status1 are empty!\n");
+               return 0;
+       }
+
+       /* Use RLC_RLCS_FED_STATUS_0/1 to distinguish FED error block. */
+       if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) ||
+           REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR))
+               ras_if = adev->sdma.ras_if;
+       else
+               ras_if = adev->gfx.ras_if;
+
+       if (!ras_if) {
+               dev_err(adev->dev, "Gfx or sdma ras block not initialized, 
rlc_status0:0x%x.\n",
+                               rlc_status0);
+               return -EINVAL;
+       }
+
+       ih_data.head = *ras_if;
+
+       dev_warn(adev->dev, "RLC %s FED IRQ\n", ras_if->name);
+       amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+
+       return 0;
+}
+
+struct amdgpu_gfx_ras gfx_v11_0_3_ras = {
+       .rlc_gc_fed_irq = gfx_v11_0_3_rlc_gc_fed_irq,
+};
diff --git a/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h 
b/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h
index 9e8ed9f4bb15..3a4670bc4449 100644
--- a/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h
+++ b/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h
@@ -49,6 +49,8 @@
 #define GFX_11_0_0__SRCID__SDMA_SEM_INCOMPLETE_TIMEOUT          65      // 
0x41 GPF(Sem incomplete timeout)
 #define GFX_11_0_0__SRCID__SDMA_SEM_WAIT_FAIL_TIMEOUT           66      // 
0x42 Semaphore wait fail timeout
 
+#define GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT                 128     // 
0x80 FED Interrupt (for data poisoning)
+
 #define GFX_11_0_0__SRCID__CP_GENERIC_INT                                      
177             // 0xB1 CP_GENERIC int
 #define GFX_11_0_0__SRCID__CP_PM4_PKT_RSVD_BIT_ERROR               180         
// 0xB4 PM4 Pkt Rsvd Bits Error
 #define GFX_11_0_0__SRCID__CP_EOP_INTERRUPT                                    
    181         // 0xB5 End-of-Pipe Interrupt
-- 
2.25.1

Reply via email to