Add a watch_queue notification channel tied to struct xe_vm so that
userspace can subscribe to asynchronous GPU device events via the
general kernel notification mechanism.

Introduce DRM_IOCTL_XE_WATCH_QUEUE to let userspace subscribe a
notification pipe (opened with pipe2(O_NOTIFICATION_PIPE)) to the device
event stream.  Embed the watch_id field (0-255) in the WATCH_INFO_ID
field of every notification, allowing multiple watches to share a single
pipe and be told apart by the reader.

Deliver notifications as struct drm_xe_watch_notification records, with
type always set to WATCH_TYPE_DRM_XE_NOTIFY and subtype drawn from enum
drm_xe_watch_event.  Define DRM_XE_WATCH_EVENT_DEVICE_RESET as the
first event, to be posted by the GPU reset path to inform userspace that
in-flight work has been lost.  Expose xe_watch_queue_post_event() as the
in-kernel posting API.

Add event definitions in a separate uapi header, <drm/xe_drm_events.h>.
The main reason is that the header needs to include <linux/watch_queue.h>
which in turn includes <linux/fcntl.h> which may conflict with the
system <fcntl.h>. Hence user-space must pay special attention when including
this file.

Assisted-by: GitHub Copilot:claude-sonnet-4.6
Signed-off-by: Thomas Hellström <[email protected]>
---
 drivers/gpu/drm/xe/Kconfig           |   1 +
 drivers/gpu/drm/xe/Makefile          |   1 +
 drivers/gpu/drm/xe/xe_device.c       |   7 ++
 drivers/gpu/drm/xe/xe_device_types.h |   6 ++
 drivers/gpu/drm/xe/xe_vm.c           |   7 +-
 drivers/gpu/drm/xe/xe_vm_types.h     |   2 +
 drivers/gpu/drm/xe/xe_watch_queue.c  | 107 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_watch_queue.h  |  20 +++++
 include/uapi/drm/xe_drm.h            |  46 ++++++++++++
 include/uapi/drm/xe_drm_events.h     |  56 ++++++++++++++
 10 files changed, 251 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_watch_queue.c
 create mode 100644 drivers/gpu/drm/xe/xe_watch_queue.h
 create mode 100644 include/uapi/drm/xe_drm_events.h

diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 4d7dcaff2b91..dbdc2fb49c53 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -25,6 +25,7 @@ config DRM_XE
        select DRM_MIPI_DSI
        select RELAY
        select IRQ_WORK
+       select WATCH_QUEUE
        # xe depends on ACPI_VIDEO when ACPI is enabled
        # but for select to work, need to select ACPI_VIDEO's dependencies, ick
        select BACKLIGHT_CLASS_DEVICE if ACPI
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index ff778fb2d4ff..1129583865ad 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -144,6 +144,7 @@ xe-y += xe_bb.o \
        xe_vsec.o \
        xe_wa.o \
        xe_wait_user_fence.o \
+       xe_watch_queue.o \
        xe_wopcm.o
 
 xe-$(CONFIG_I2C)       += xe_i2c.o
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 3462645ca13c..89bc221546ce 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -9,6 +9,7 @@
 #include <linux/delay.h>
 #include <linux/fault-inject.h>
 #include <linux/units.h>
+#include <linux/watch_queue.h>
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_client.h>
@@ -75,6 +76,7 @@
 #include "xe_vsec.h"
 #include "xe_wait_user_fence.h"
 #include "xe_wa.h"
+#include "xe_watch_queue.h"
 
 #include <generated/xe_device_wa_oob.h>
 #include <generated/xe_wa_oob.h>
@@ -110,6 +112,8 @@ static int xe_file_open(struct drm_device *dev, struct 
drm_file *file)
        file->driver_priv = xef;
        kref_init(&xef->refcount);
 
+       init_watch_list(&xef->watch_list, NULL);
+
        task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
        if (task) {
                xef->process_name = kstrdup(task->comm, GFP_KERNEL);
@@ -124,6 +128,8 @@ static void xe_file_destroy(struct kref *ref)
 {
        struct xe_file *xef = container_of(ref, struct xe_file, refcount);
 
+       remove_watch_from_object(&xef->watch_list, NULL, 0, true);
+
        xa_destroy(&xef->exec_queue.xa);
        mutex_destroy(&xef->exec_queue.lock);
        xa_destroy(&xef->vm.xa);
@@ -211,6 +217,7 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
                          DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, 
xe_exec_queue_set_property_ioctl,
                          DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF_DRV(XE_WATCH_QUEUE, xe_watch_queue_ioctl, 
DRM_RENDER_ALLOW),
 };
 
 static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long 
arg)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
b/drivers/gpu/drm/xe/xe_device_types.h
index caa8f34a6744..a42e6125c069 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -11,6 +11,7 @@
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
 #include <drm/ttm/ttm_device.h>
+#include <linux/watch_queue.h>
 
 #include "xe_devcoredump_types.h"
 #include "xe_heci_gsc.h"
@@ -629,6 +630,11 @@ struct xe_file {
 
        /** @refcount: ref count of this xe file */
        struct kref refcount;
+
+#ifdef CONFIG_WATCH_QUEUE
+       /** @watch_list: per-file notification source for device events */
+       struct watch_list watch_list;
+#endif
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 548b0769b3ef..1f331a2b2ecc 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -13,6 +13,7 @@
 #include <drm/drm_print.h>
 #include <drm/ttm/ttm_tt.h>
 #include <uapi/drm/xe_drm.h>
+#include <uapi/drm/xe_drm_events.h>
 #include <linux/ascii85.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
@@ -40,6 +41,7 @@
 #include "xe_tlb_inval.h"
 #include "xe_trace_bo.h"
 #include "xe_wa.h"
+#include "xe_watch_queue.h"
 
 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
 {
@@ -567,13 +569,13 @@ static void preempt_rebind_work_func(struct work_struct 
*w)
        }
 
        if (err) {
-               drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
+               xe_watch_queue_post_vm_err_event(vm->xef, vm->id, err);
+               drm_dbg(&vm->xe->drm, "VM worker error: %d\n", err);
                xe_vm_kill(vm, true);
        }
        up_write(&vm->lock);
 
        free_preempt_fences(&preempt_fences);
-
        trace_xe_vm_rebind_worker_exit(vm);
 }
 
@@ -2008,6 +2010,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
        if (err)
                goto err_close_and_put;
 
+       vm->id = id;
        args->vm_id = id;
 
        return 0;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 1f6f7e30e751..df559cf87b4c 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -365,6 +365,8 @@ struct xe_vm {
        bool batch_invalidate_tlb;
        /** @xef: Xe file handle for tracking this VM's drm client */
        struct xe_file *xef;
+       /** @id: The id of the VM in the VM table of @xef. */
+       u32 id;
 };
 
 /** struct xe_vma_op_map - VMA map operation */
diff --git a/drivers/gpu/drm/xe/xe_watch_queue.c 
b/drivers/gpu/drm/xe/xe_watch_queue.c
new file mode 100644
index 000000000000..14c93cdebefe
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_watch_queue.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include <linux/slab.h>
+#include <linux/watch_queue.h>
+
+#include <uapi/drm/xe_drm.h>
+#include <uapi/drm/xe_drm_events.h>
+
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_macros.h"
+#include "xe_watch_queue.h"
+
+/**
+ * struct xe_watch_notification_vm_err - kernel-side VM error event 
notification
+ *
+ * Layout mirrors &struct drm_xe_watch_notification_vm_err.
+ *
+ * @base: common watch notification header; type is %WATCH_TYPE_DRM_XE_NOTIFY,
+ *        subtype is %DRM_XE_WATCH_EVENT_VM_ERR
+ * @vm_id: ID of the VM that hit error
+ * @error_code: error code describing the error condition (negative errno)
+ */
+struct xe_watch_notification_vm_err {
+       struct watch_notification base;
+       u32 vm_id;
+       s32 error_code;
+};
+
+/**
+ * xe_watch_queue_ioctl() - Subscribe a pipe to per-file device event 
notifications
+ * @dev: DRM device
+ * @data: pointer to &struct drm_xe_watch_queue from userspace
+ * @file: DRM file handle of the subscribing process
+ *
+ * Subscribes a notification pipe to receive Xe device events for the calling
+ * process's file handle.  Only events scoped to this file (e.g. VM error on a
+ * VM owned by this file) are delivered.  The pipe must have been opened with
+ * O_NOTIFICATION_PIPE and sized with %IOC_WATCH_QUEUE_SET_SIZE before calling
+ * this IOCTL.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int xe_watch_queue_ioctl(struct drm_device *dev, void *data, struct drm_file 
*file)
+{
+       struct xe_file *xef = file->driver_priv;
+       struct xe_device *xe = to_xe_device(dev);
+       struct drm_xe_watch_queue *args = data;
+       struct watch_queue *wqueue;
+       struct watch *watch;
+       int ret;
+
+       if (XE_IOCTL_DBG(xe, args->flags || args->pad))
+               return -EINVAL;
+       if (XE_IOCTL_DBG(xe, args->watch_id > 0xff))
+               return -EINVAL;
+
+       wqueue = get_watch_queue(args->fd);
+       if (XE_IOCTL_DBG(xe, IS_ERR(wqueue)))
+               return PTR_ERR(wqueue);
+
+       watch = kzalloc(sizeof(*watch), GFP_KERNEL | __GFP_ACCOUNT);
+       if (XE_IOCTL_DBG(xe, !watch)) {
+               ret = -ENOMEM;
+               goto out_put_queue;
+       }
+
+       init_watch(watch, wqueue);
+       watch->id = 0;
+       watch->info_id = (u32)args->watch_id << WATCH_INFO_ID__SHIFT;
+
+       ret = add_watch_to_object(watch, &xef->watch_list);
+       if (XE_IOCTL_DBG(xe, ret))
+               kfree(watch);
+
+out_put_queue:
+       put_watch_queue(wqueue);
+       return ret;
+}
+
+/**
+ * xe_watch_queue_post_vm_err_event() - Post a VM error event
+ * @xef: xe file handle that owns the VM
+ * @vm_id: userspace ID of the VM that hit error
+ * @error_code: error code describing the error condition (negative errno)
+ *
+ * Posts a %DRM_XE_WATCH_EVENT_VM_ERR notification carrying @vm_id and
+ * @error_code to every pipe that @xef has subscribed via
+ * %DRM_IOCTL_XE_WATCH_QUEUE.  Only the owning process is notified,
+ * preventing information leaks to other clients.
+ */
+void xe_watch_queue_post_vm_err_event(struct xe_file *xef, u32 vm_id,
+                                     int error_code)
+{
+       struct xe_watch_notification_vm_err n = {};
+
+       n.base.type    = WATCH_TYPE_DRM_XE_NOTIFY;
+       n.base.subtype = DRM_XE_WATCH_EVENT_VM_ERR;
+       n.base.info    = watch_sizeof(struct xe_watch_notification_vm_err);
+       n.vm_id        = vm_id;
+       n.error_code   = error_code;
+
+       post_watch_notification(&xef->watch_list, &n.base, current_cred(), 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_watch_queue.h 
b/drivers/gpu/drm/xe/xe_watch_queue.h
new file mode 100644
index 000000000000..ad199ee68205
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_watch_queue.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef _XE_WATCH_QUEUE_H_
+#define _XE_WATCH_QUEUE_H_
+
+#include <linux/types.h>
+
+struct drm_device;
+struct drm_file;
+struct xe_file;
+
+int xe_watch_queue_ioctl(struct drm_device *dev, void *data,
+                        struct drm_file *file);
+void xe_watch_queue_post_vm_err_event(struct xe_file *xef, u32 vm_id,
+                                     int error_code);
+
+#endif /* _XE_WATCH_QUEUE_H_ */
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index ef2565048bdf..bc3917700c82 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -83,6 +83,7 @@ extern "C" {
  *  - &DRM_IOCTL_XE_OBSERVATION
  *  - &DRM_IOCTL_XE_MADVISE
  *  - &DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS
+ *  - &DRM_IOCTL_XE_WATCH_QUEUE
  */
 
 /*
@@ -107,6 +108,7 @@ extern "C" {
 #define DRM_XE_MADVISE                 0x0c
 #define DRM_XE_VM_QUERY_MEM_RANGE_ATTRS        0x0d
 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0e
+#define DRM_XE_WATCH_QUEUE             0x0f
 
 /* Must be kept compact -- no holes */
 
@@ -125,6 +127,7 @@ extern "C" {
 #define DRM_IOCTL_XE_MADVISE                   DRM_IOW(DRM_COMMAND_BASE + 
DRM_XE_MADVISE, struct drm_xe_madvise)
 #define DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_VM_QUERY_MEM_RANGE_ATTRS, struct drm_xe_vm_query_mem_range_attr)
 #define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY   DRM_IOW(DRM_COMMAND_BASE + 
DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
+#define DRM_IOCTL_XE_WATCH_QUEUE               DRM_IOW(DRM_COMMAND_BASE + 
DRM_XE_WATCH_QUEUE, struct drm_xe_watch_queue)
 
 /**
  * DOC: Xe IOCTL Extensions
@@ -2357,6 +2360,49 @@ struct drm_xe_exec_queue_set_property {
        __u64 reserved[2];
 };
 
+/**
+ * DOC: DRM_XE_WATCH_QUEUE
+ *
+ * Subscribe a notification pipe to receive device events for the calling
+ * process's DRM file handle.  Events are scoped to the subscribing file:
+ * only events that belong to that file (for example, VM error on a VM created
+ * through the same file) are delivered, preventing information leaks between
+ * processes sharing the same GPU device.
+ *
+ * The pipe must first be opened with O_NOTIFICATION_PIPE (i.e. O_EXCL passed
+ * to pipe2()) and sized via %IOC_WATCH_QUEUE_SET_SIZE before subscribing.
+ *
+ * Events are delivered as notification records read from the pipe.  The
+ * @watch_id field is embedded in the notification info field and can be used
+ * to distinguish multiple watches sharing a pipe.
+ *
+ * Currently defined event subtypes:
+ *  - %DRM_XE_WATCH_EVENT_VM_ERR - a VM owned by this file has encountered an 
error
+ */
+
+/**
+ * struct drm_xe_watch_queue - subscribe to device event notifications
+ *
+ * Used with %DRM_IOCTL_XE_WATCH_QUEUE.  Notifications are scoped to the
+ * DRM file handle used to issue this IOCTL.
+ */
+struct drm_xe_watch_queue {
+       /** @fd: file descriptor of pipe opened with O_NOTIFICATION_PIPE */
+       __u32 fd;
+
+       /**
+        * @watch_id: identifier (0–255) embedded in the watch notification
+        * info field; allows multiplexing several watches on one pipe
+        */
+       __u32 watch_id;
+
+       /** @flags: must be zero */
+       __u32 flags;
+
+       /** @pad: reserved, must be zero */
+       __u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/uapi/drm/xe_drm_events.h b/include/uapi/drm/xe_drm_events.h
new file mode 100644
index 000000000000..91813548ae01
--- /dev/null
+++ b/include/uapi/drm/xe_drm_events.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef _UAPI_XE_DRM_EVENTS_H_
+#define _UAPI_XE_DRM_EVENTS_H_
+
+#include <linux/types.h>
+#include <linux/watch_queue.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/**
+ * enum drm_xe_watch_event - Xe device watch event subtypes
+ *
+ * Subtypes for notifications delivered via %WATCH_TYPE_DRM_XE_NOTIFY when
+ * reading from a pipe subscribed with %DRM_IOCTL_XE_WATCH_QUEUE.
+ */
+enum drm_xe_watch_event {
+       /**
+        * @DRM_XE_WATCH_EVENT_VM_ERR: a VM has encountered an error.
+        *
+        * Indicates that a memory allocation failure occurred within the
+        * given VM.  The vm_id of the affected VM is carried in the
+        * @drm_xe_watch_notification_vm_err::vm_id field of the extended
+        * notification record.
+        */
+       DRM_XE_WATCH_EVENT_VM_ERR = 0,
+};
+
+/**
+ * struct drm_xe_watch_notification_vm_err - VM error event notification
+ *
+ * Notification record delivered for %DRM_XE_WATCH_EVENT_VM_ERR.
+ * The record type is always %WATCH_TYPE_DRM_XE_NOTIFY and the subtype is
+ * %DRM_XE_WATCH_EVENT_VM_ERR.
+ */
+struct drm_xe_watch_notification_vm_err {
+       /** @base: common watch notification header */
+       struct watch_notification base;
+
+       /** @vm_id: ID of the VM that hit out-of-memory */
+       __u32 vm_id;
+
+       /** @error_code: error code describing the error condition (negative 
errno) */
+       __s32 error_code;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _UAPI_XE_DRM_H_ */
-- 
2.53.0

Reply via email to