Add error-event support for Correctable errors in CRI. error-event is reported to userspace for all errors that crossed threshold on receiving an interrupt for correctable errors.
Signed-off-by: Riana Tauro <[email protected]> --- drivers/gpu/drm/xe/xe_ras.c | 53 +++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c index 44f4e1a3455b..acf3207aa2fd 100644 --- a/drivers/gpu/drm/xe/xe_ras.c +++ b/drivers/gpu/drm/xe/xe_ras.c @@ -77,6 +77,18 @@ static u8 drm_to_xe_ras_severity(u8 severity) } } +static u8 xe_to_drm_ras_severity(u8 severity) +{ + switch (severity) { + case XE_RAS_SEV_CORRECTABLE: + return DRM_XE_RAS_ERR_SEV_CORRECTABLE; + case XE_RAS_SEV_UNCORRECTABLE: + return DRM_XE_RAS_ERR_SEV_UNCORRECTABLE; + default: + return DRM_XE_RAS_ERR_SEV_MAX; + } +} + static u8 drm_to_xe_ras_component(u8 component) { switch (component) { @@ -95,6 +107,24 @@ static u8 drm_to_xe_ras_component(u8 component) } } +static u8 xe_to_drm_ras_component(u8 component) +{ + switch (component) { + case XE_RAS_COMP_DEVICE_MEMORY: + return DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY; + case XE_RAS_COMP_CORE_COMPUTE: + return DRM_XE_RAS_ERR_COMP_CORE_COMPUTE; + case XE_RAS_COMP_PCIE: + return DRM_XE_RAS_ERR_COMP_PCIE; + case XE_RAS_COMP_FABRIC: + return DRM_XE_RAS_ERR_COMP_FABRIC; + case XE_RAS_COMP_SOC_INTERNAL: + return DRM_XE_RAS_ERR_COMP_SOC_INTERNAL; + default: + return DRM_XE_RAS_ERR_COMP_MAX; + } +} + static int ras_status_to_errno(u32 status) { switch (status) { @@ -131,6 +161,27 @@ static inline const char *comp_to_str(u8 component) return xe_ras_components[component]; } +static void ras_send_error_event(struct xe_device *xe, u8 severity, u8 component) +{ + u8 drm_severity, drm_component; + u32 value; + int ret; + + drm_severity = xe_to_drm_ras_severity(severity); + if (drm_severity == DRM_XE_RAS_ERR_SEV_MAX) + return; + + drm_component = xe_to_drm_ras_component(component); + if (drm_component == DRM_XE_RAS_ERR_COMP_MAX) + return; + + ret = xe_ras_get_counter(xe, severity, component, &value); + if (ret) + return; + + xe_drm_ras_event(xe, drm_component, drm_severity, value, GFP_KERNEL); +} + void xe_ras_counter_threshold_crossed(struct xe_device *xe, struct xe_sysctrl_event_response *response) { @@ -152,6 +203,8 @@ void xe_ras_counter_threshold_crossed(struct xe_device *xe, severity = errors[id].common.severity; component = errors[id].common.component; + ras_send_error_event(xe, severity, component); + xe_warn(xe, "[RAS]: %s %s detected\n", comp_to_str(component), sev_to_str(severity)); } -- 2.47.1
