On 05-06-2026 00:16, Raag Jadav wrote:
System controller allows getting/setting per counter threshold, which it
for correctable errors.
uses to raise error events to the driver. Get/set it using the respective
mailbox command.
Signed-off-by: Raag Jadav <[email protected]>
---
v2: Add RAS operation status codes (Riana)
v3: Reuse status codes and uapi mapping from counter series (Riana)
Access request/response counter using local pointer (Riana)
Mark unused field as reserved (Riana)
---
drivers/gpu/drm/xe/xe_ras.c | 105 ++++++++++++++++++
drivers/gpu/drm/xe/xe_ras.h | 2 +
drivers/gpu/drm/xe/xe_ras_types.h | 51 +++++++++
drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h | 4 +
4 files changed, 162 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c
index 7cb6fcb1254a..d6f89b429cec 100644
--- a/drivers/gpu/drm/xe/xe_ras.c
+++ b/drivers/gpu/drm/xe/xe_ras.c
@@ -270,6 +270,111 @@ int xe_ras_clear_counter(struct xe_device *xe, u8
severity, u8 component)
return 0;
}
+/**
+ * xe_ras_get_threshold() - Get error counter threshold
+ * @xe: Xe device instance
+ * @severity: Error severity to be queried (&enum drm_xe_ras_error_severity)
+ * @component: Error component to be queried (&enum drm_xe_ras_error_component)
+ * @threshold: Counter threshold
+ *
+ * This function retrieves the error threshold of a specific counter based on
+ * severity and component.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int xe_ras_get_threshold(struct xe_device *xe, u8 severity, u8 component, u32
*threshold)
+{
+ struct xe_ras_get_threshold_response response = {};
+ struct xe_ras_get_threshold_request request = {};
+ struct xe_sysctrl_mailbox_command command = {};
+ struct xe_ras_error_class *counter;
+ size_t len;
+ int ret;
+
+ counter = &request.counter;
+ counter->common.severity = drm_to_xe_ras_severity(severity);
+ counter->common.component = drm_to_xe_ras_component(component);
+
+ xe_sysctrl_create_command(&command, XE_SYSCTRL_GROUP_GFSP,
XE_SYSCTRL_CMD_GET_THRESHOLD,
+ &request, sizeof(request), &response,
sizeof(response));
+
+ guard(xe_pm_runtime)(xe);
+ ret = xe_sysctrl_send_command(&xe->sc, &command, &len);
+ if (ret) {
+ xe_err(xe, "sysctrl: failed to get threshold %d\n", ret);
+ return ret;
+ }
+
+ if (len != sizeof(response)) {
+ xe_err(xe, "sysctrl: unexpected get threshold response length %zu
(expected %zu)\n",
+ len, sizeof(response));
+ return -EIO;
+ }
+
+ counter = &response.counter;
+ *threshold = response.threshold;
+
+ xe_dbg(xe, "[RAS]: get counter threshold %u for %s %s\n", *threshold,
+ comp_to_str(counter->common.component),
sev_to_str(counter->common.severity));
"get threshold" to be consistent with <operation> <value> <component>
<severity>
and other prints
+ return 0;
+}
+
+/**
+ * xe_ras_set_threshold() - Set error counter threshold
+ * @xe: Xe device instance
+ * @severity: Error severity to be set (&enum drm_xe_ras_error_severity)
+ * @component: Error component to be set (&enum drm_xe_ras_error_component)
+ * @threshold: Counter threshold
+ *
+ * This function sets the error threshold of a specific counter based on
+ * severity and component.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int xe_ras_set_threshold(struct xe_device *xe, u8 severity, u8 component, u32
threshold)
+{
+ struct xe_ras_set_threshold_response response = {};
+ struct xe_ras_set_threshold_request request = {};
+ struct xe_sysctrl_mailbox_command command = {};
+ struct xe_ras_error_class *counter;
+ size_t len;
+ int ret;
+
+ counter = &request.counter;
+ counter->common.severity = drm_to_xe_ras_severity(severity);
+ counter->common.component = drm_to_xe_ras_component(component);
+ request.threshold = threshold;
+
+ xe_sysctrl_create_command(&command, XE_SYSCTRL_GROUP_GFSP,
XE_SYSCTRL_CMD_SET_THRESHOLD,
+ &request, sizeof(request), &response,
sizeof(response));
+
+ guard(xe_pm_runtime)(xe);
+ ret = xe_sysctrl_send_command(&xe->sc, &command, &len);
+ if (ret) {
+ xe_err(xe, "sysctrl: failed to set threshold %d\n", ret);
+ return ret;
+ }
+
+ if (len != sizeof(response)) {
+ xe_err(xe, "sysctrl: unexpected set threshold response length %zu
(expected %zu)\n",
+ len, sizeof(response));
+ return -EIO;
+ }
+
+ ret = ras_status_to_errno(response.status);
+ if (ret) {
+ xe_err(xe, "sysctrl: set threshold command failed with status
%#x\n",
+ response.status);
+ return ret;
+ }
+
+ counter = &response.counter;
+
+ xe_dbg(xe, "[RAS]: set counter threshold %u for %s %s\n",
response.threshold,
set threshold
+ comp_to_str(counter->common.component),
sev_to_str(counter->common.severity));
+ return 0;
+}
+
/**
* xe_ras_init - Initialize Xe RAS
* @xe: xe device instance
diff --git a/drivers/gpu/drm/xe/xe_ras.h b/drivers/gpu/drm/xe/xe_ras.h
index ba0b0224df23..1aa43c54b710 100644
--- a/drivers/gpu/drm/xe/xe_ras.h
+++ b/drivers/gpu/drm/xe/xe_ras.h
@@ -15,6 +15,8 @@ void xe_ras_counter_threshold_crossed(struct xe_device *xe,
struct xe_sysctrl_event_response
*response);
int xe_ras_get_counter(struct xe_device *xe, u8 severity, u8 component, u32
*value);
int xe_ras_clear_counter(struct xe_device *xe, u8 severity, u8 component);
+int xe_ras_get_threshold(struct xe_device *xe, u8 severity, u8 component, u32
*threshold);
+int xe_ras_set_threshold(struct xe_device *xe, u8 severity, u8 component, u32
threshold);
void xe_ras_init(struct xe_device *xe);
#endif
diff --git a/drivers/gpu/drm/xe/xe_ras_types.h
b/drivers/gpu/drm/xe/xe_ras_types.h
index c6392435d1c6..8ea817583eed 100644
--- a/drivers/gpu/drm/xe/xe_ras_types.h
+++ b/drivers/gpu/drm/xe/xe_ras_types.h
@@ -121,4 +121,55 @@ struct xe_ras_clear_counter_response {
/** @reserved1: Reserved for future use */
u32 reserved1[3];
} __packed;
+
+/**
+ * struct xe_ras_get_threshold_request - Request structure for get threshold
+ */
+struct xe_ras_get_threshold_request {
+ /** @counter: Counter to get threshold for */
+ struct xe_ras_error_class counter;
+ /** @reserved: Reserved for future use */
+ u32 reserved;
+} __packed;
+
+/**
+ * struct xe_ras_get_threshold_response - Response structure for get threshold
+ */
+struct xe_ras_get_threshold_response {
+ /** @counter: Counter ID */
+ struct xe_ras_error_class counter;
+ /** @threshold: Threshold value */
+ u32 threshold;
+ /** @reserved: Reserved for future use */
+ u32 reserved[4];
+} __packed;
+
+/**
+ * struct xe_ras_set_threshold_request - Request structure for set threshold
+ */
+struct xe_ras_set_threshold_request {
+ /** @counter: Counter to set threshold for */
+ struct xe_ras_error_class counter;
+ /** @threshold: Threshold value to set */
+ u32 threshold;
+ /** @reserved: Reserved for future use */
+ u32 reserved;
+} __packed;
+
+/**
+ * struct xe_ras_set_threshold_response - Response structure for set threshold
+ */
+struct xe_ras_set_threshold_response {
+ /** @counter: Counter ID */
+ struct xe_ras_error_class counter;
+ /** @reserved: Reserved */
+ u32 reserved;
+ /** @threshold: Updated threshold value */
+ u32 threshold;
+ /** @status: Set threshold operation status */
Nit: Already part of set threshold. Can be just operation status
Thanks
Riana
+ u32 status;
+ /** @reserved1: Reserved for future use */
+ u32 reserved1[2];
+} __packed;
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
index 6e3753554510..10f06aa5c4b5 100644
--- a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
+++ b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
@@ -24,11 +24,15 @@ enum xe_sysctrl_group {
*
* @XE_SYSCTRL_CMD_GET_COUNTER: Get error counter value
* @XE_SYSCTRL_CMD_CLEAR_COUNTER: Clear error counter value
+ * @XE_SYSCTRL_CMD_GET_THRESHOLD: Retrieve error threshold
+ * @XE_SYSCTRL_CMD_SET_THRESHOLD: Set error threshold
* @XE_SYSCTRL_CMD_GET_PENDING_EVENT: Retrieve pending event
*/
enum xe_sysctrl_gfsp_cmd {
XE_SYSCTRL_CMD_GET_COUNTER = 0x03,
XE_SYSCTRL_CMD_CLEAR_COUNTER = 0x04,
+ XE_SYSCTRL_CMD_GET_THRESHOLD = 0x05,
+ XE_SYSCTRL_CMD_SET_THRESHOLD = 0x06,
XE_SYSCTRL_CMD_GET_PENDING_EVENT = 0x07,
};