Module: Mesa
Branch: main
Commit: dea6c8243749e63aa9b517edbfe7489dbfd103a4
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dea6c8243749e63aa9b517edbfe7489dbfd103a4

Author: José Roberto de Souza <[email protected]>
Date:   Thu Dec 14 09:41:22 2023 -0800

intel: Sync xe_drm.h final part

Sync xe_drm.h with commit a8ff56e160bb ("drm/xe/uapi: Remove reset uevent for 
now").

This is the last xe_drm.h uAPI break.

The only relevant change for ANV and Iris is that now VM bind uAPI
is asynchronous only so I had to bring back the syncobj creation, wait
and destruction.

Is still in the Xe port TODO list to make VM binds truly asynchronous.

Signed-off-by: José Roberto de Souza <[email protected]>
Reviewed-by: Paulo Zanoni <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26699>

---

 include/drm-uapi/xe_drm.h                      | 829 ++++++++++++++++---------
 src/gallium/drivers/iris/xe/iris_kmd_backend.c |  31 +-
 src/intel/vulkan/xe/anv_kmd_backend.c          |  30 +-
 3 files changed, 584 insertions(+), 306 deletions(-)

diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h
index 590f7b7af4b..bacdca787c2 100644
--- a/include/drm-uapi/xe_drm.h
+++ b/include/drm-uapi/xe_drm.h
@@ -12,22 +12,113 @@
 extern "C" {
 #endif
 
-/* Please note that modifications to all structs defined here are
+/*
+ * Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
+ * Sections in this file are organized as follows:
+ *   1. IOCTL definition
+ *   2. Extension definition and helper structs
+ *   3. IOCTL's Query structs in the order of the Query's entries.
+ *   4. The rest of IOCTL structs in the order of IOCTL declaration.
  */
 
 /**
- * DOC: uevent generated by xe on it's pci node.
+ * DOC: Xe Device Block Diagram
+ *
+ * The diagram below represents a high-level simplification of a discrete
+ * GPU supported by the Xe driver. It shows some device components which
+ * are necessary to understand this API, as well as how their relations
+ * to each other. This diagram does not represent real hardware::
  *
- * DRM_XE_RESET_FAILED_UEVENT - Event is generated when attempt to reset gt
- * fails. The value supplied with the event is always "NEEDS_RESET".
- * Additional information supplied is tile id and gt id of the gt unit for
- * which reset has failed.
+ *   ┌──────────────────────────────────────────────────────────────────┐
+ *   │ ┌──────────────────────────────────────────────────┐ ┌─────────┐ │
+ *   │ │        ┌───────────────────────┐   ┌─────┐       │ │ ┌─────┐ │ │
+ *   │ │        │         VRAM0         ├───┤ ... │       │ │ │VRAM1│ │ │
+ *   │ │        └───────────┬───────────┘   └─GT1─┘       │ │ └──┬──┘ │ │
+ *   │ │ ┌──────────────────┴───────────────────────────┐ │ │ ┌──┴──┐ │ │
+ *   │ │ │ ┌─────────────────────┐  ┌─────────────────┐ │ │ │ │     │ │ │
+ *   │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │ │ │EU│ │EU│ │EU│ │EU│ │  │ │RCS0 │ │BCS0 │ │ │ │ │ │     │ │ │
+ *   │ │ │ │ └──┘ └──┘ └──┘ └──┘ │  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │ │ │EU│ │EU│ │EU│ │EU│ │  │ │VCS0 │ │VCS1 │ │ │ │ │ │     │ │ │
+ *   │ │ │ │ └──┘ └──┘ └──┘ └──┘ │  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │ │ │EU│ │EU│ │EU│ │EU│ │  │ │VECS0│ │VECS1│ │ │ │ │ │ ... │ │ │
+ *   │ │ │ │ └──┘ └──┘ └──┘ └──┘ │  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │ │ │EU│ │EU│ │EU│ │EU│ │  │ │CCS0 │ │CCS1 │ │ │ │ │ │     │ │ │
+ *   │ │ │ │ └──┘ └──┘ └──┘ └──┘ │  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ └─────────DSS─────────┘  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │                          │ │CCS2 │ │CCS3 │ │ │ │ │ │     │ │ │
+ *   │ │ │ ┌─────┐ ┌─────┐ ┌─────┐  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ │ ... │ │ ... │ │ ... │  │                 │ │ │ │ │     │ │ │
+ *   │ │ │ └─DSS─┘ └─DSS─┘ └─DSS─┘  └─────Engines─────┘ │ │ │ │     │ │ │
+ *   │ │ └───────────────────────────GT0────────────────┘ │ │ └─GT2─┘ │ │
+ *   │ └────────────────────────────Tile0─────────────────┘ └─ Tile1──┘ │
+ *   └─────────────────────────────Device0───────┬──────────────────────┘
+ *                                               │
+ *                        ───────────────────────┴────────── PCI bus
  */
-#define DRM_XE_RESET_FAILED_UEVENT "DEVICE_STATUS"
 
 /**
- * struct xe_user_extension - Base class for defining a chain of extensions
+ * DOC: Xe uAPI Overview
+ *
+ * This section aims to describe the Xe's IOCTL entries, its structs, and other
+ * Xe related uAPI such as uevents and PMU (Platform Monitoring Unit) related
+ * entries and usage.
+ *
+ * List of supported IOCTLs:
+ *  - &DRM_IOCTL_XE_DEVICE_QUERY
+ *  - &DRM_IOCTL_XE_GEM_CREATE
+ *  - &DRM_IOCTL_XE_GEM_MMAP_OFFSET
+ *  - &DRM_IOCTL_XE_VM_CREATE
+ *  - &DRM_IOCTL_XE_VM_DESTROY
+ *  - &DRM_IOCTL_XE_VM_BIND
+ *  - &DRM_IOCTL_XE_EXEC_QUEUE_CREATE
+ *  - &DRM_IOCTL_XE_EXEC_QUEUE_DESTROY
+ *  - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
+ *  - &DRM_IOCTL_XE_EXEC
+ *  - &DRM_IOCTL_XE_WAIT_USER_FENCE
+ */
+
+/*
+ * xe specific ioctls.
+ *
+ * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie
+ * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset
+ * against DRM_COMMAND_BASE and should be between [0x0, 0x60).
+ */
+#define DRM_XE_DEVICE_QUERY            0x00
+#define DRM_XE_GEM_CREATE              0x01
+#define DRM_XE_GEM_MMAP_OFFSET         0x02
+#define DRM_XE_VM_CREATE               0x03
+#define DRM_XE_VM_DESTROY              0x04
+#define DRM_XE_VM_BIND                 0x05
+#define DRM_XE_EXEC_QUEUE_CREATE       0x06
+#define DRM_XE_EXEC_QUEUE_DESTROY      0x07
+#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08
+#define DRM_XE_EXEC                    0x09
+#define DRM_XE_WAIT_USER_FENCE         0x0a
+/* Must be kept compact -- no holes */
+
+#define DRM_IOCTL_XE_DEVICE_QUERY              DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
+#define DRM_IOCTL_XE_GEM_CREATE                        
DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create)
+#define DRM_IOCTL_XE_GEM_MMAP_OFFSET           DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset)
+#define DRM_IOCTL_XE_VM_CREATE                 DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_VM_CREATE, struct drm_xe_vm_create)
+#define DRM_IOCTL_XE_VM_DESTROY                        
DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
+#define DRM_IOCTL_XE_VM_BIND                   DRM_IOW(DRM_COMMAND_BASE + 
DRM_XE_VM_BIND, struct drm_xe_vm_bind)
+#define DRM_IOCTL_XE_EXEC_QUEUE_CREATE         DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_EXEC_QUEUE_CREATE, struct drm_xe_exec_queue_create)
+#define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY                
DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct 
drm_xe_exec_queue_destroy)
+#define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
+#define DRM_IOCTL_XE_EXEC                      DRM_IOW(DRM_COMMAND_BASE + 
DRM_XE_EXEC, struct drm_xe_exec)
+#define DRM_IOCTL_XE_WAIT_USER_FENCE           DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
+
+/**
+ * DOC: Xe IOCTL Extensions
+ *
+ * Before detailing the IOCTLs and its structs, it is important to highlight
+ * that every IOCTL in Xe is extensible.
  *
  * Many interfaces need to grow over time. In most cases we can simply
  * extend the struct and have userspace pass in more data. Another option,
@@ -45,29 +136,32 @@ extern "C" {
  *
  * .. code-block:: C
  *
- *     struct xe_user_extension ext3 {
+ *     struct drm_xe_user_extension ext3 {
  *             .next_extension = 0, // end
  *             .name = ...,
  *     };
- *     struct xe_user_extension ext2 {
+ *     struct drm_xe_user_extension ext2 {
  *             .next_extension = (uintptr_t)&ext3,
  *             .name = ...,
  *     };
- *     struct xe_user_extension ext1 {
+ *     struct drm_xe_user_extension ext1 {
  *             .next_extension = (uintptr_t)&ext2,
  *             .name = ...,
  *     };
  *
- * Typically the struct xe_user_extension would be embedded in some uAPI
+ * Typically the struct drm_xe_user_extension would be embedded in some uAPI
  * struct, and in this case we would feed it the head of the chain(i.e ext1),
  * which would then apply all of the above extensions.
- *
+*/
+
+/**
+ * struct drm_xe_user_extension - Base class for defining a chain of extensions
  */
-struct xe_user_extension {
+struct drm_xe_user_extension {
        /**
         * @next_extension:
         *
-        * Pointer to the next struct xe_user_extension, or zero if the end.
+        * Pointer to the next struct drm_xe_user_extension, or zero if the end.
         */
        __u64 next_extension;
 
@@ -78,7 +172,7 @@ struct xe_user_extension {
         *
         * Also note that the name space for this is not global for the whole
         * driver, but rather its scope/meaning is limited to the specific piece
-        * of uAPI which has embedded the struct xe_user_extension.
+        * of uAPI which has embedded the struct drm_xe_user_extension.
         */
        __u32 name;
 
@@ -90,38 +184,28 @@ struct xe_user_extension {
        __u32 pad;
 };
 
-/*
- * xe specific ioctls.
+/**
+ * struct drm_xe_ext_set_property - Generic set property extension
  *
- * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie
- * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset
- * against DRM_COMMAND_BASE and should be between [0x0, 0x60).
+ * A generic struct that allows any of the Xe's IOCTL to be extended
+ * with a set_property operation.
  */
-#define DRM_XE_DEVICE_QUERY            0x00
-#define DRM_XE_GEM_CREATE              0x01
-#define DRM_XE_GEM_MMAP_OFFSET         0x02
-#define DRM_XE_VM_CREATE               0x03
-#define DRM_XE_VM_DESTROY              0x04
-#define DRM_XE_VM_BIND                 0x05
-#define DRM_XE_EXEC_QUEUE_CREATE       0x06
-#define DRM_XE_EXEC_QUEUE_DESTROY      0x07
-#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08
-#define DRM_XE_EXEC                    0x09
-#define DRM_XE_WAIT_USER_FENCE         0x0a
-/* Must be kept compact -- no holes */
+struct drm_xe_ext_set_property {
+       /** @base: base user extension */
+       struct drm_xe_user_extension base;
 
-#define DRM_IOCTL_XE_DEVICE_QUERY              DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
-#define DRM_IOCTL_XE_GEM_CREATE                        
DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create)
-#define DRM_IOCTL_XE_GEM_MMAP_OFFSET           DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset)
-#define DRM_IOCTL_XE_VM_CREATE                 DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_VM_CREATE, struct drm_xe_vm_create)
-#define DRM_IOCTL_XE_VM_DESTROY                        
DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
-#define DRM_IOCTL_XE_VM_BIND                   DRM_IOW(DRM_COMMAND_BASE + 
DRM_XE_VM_BIND, struct drm_xe_vm_bind)
-#define DRM_IOCTL_XE_EXEC_QUEUE_CREATE         DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_EXEC_QUEUE_CREATE, struct drm_xe_exec_queue_create)
-#define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY                
DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct 
drm_xe_exec_queue_destroy)
-#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY   DRM_IOW(DRM_COMMAND_BASE + 
DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
-#define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
-#define DRM_IOCTL_XE_EXEC                      DRM_IOW(DRM_COMMAND_BASE + 
DRM_XE_EXEC, struct drm_xe_exec)
-#define DRM_IOCTL_XE_WAIT_USER_FENCE           DRM_IOWR(DRM_COMMAND_BASE + 
DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
+       /** @property: property to set */
+       __u32 property;
+
+       /** @pad: MBZ */
+       __u32 pad;
+
+       /** @value: property value */
+       __u64 value;
+
+       /** @reserved: Reserved */
+       __u64 reserved[2];
+};
 
 /**
  * struct drm_xe_engine_class_instance - instance of an engine class
@@ -130,6 +214,15 @@ struct xe_user_extension {
  * the input of engine selection for both @drm_xe_exec_queue_create and
  * @drm_xe_query_engine_cycles
  *
+ * The @engine_class can be:
+ *  - %DRM_XE_ENGINE_CLASS_RENDER
+ *  - %DRM_XE_ENGINE_CLASS_COPY
+ *  - %DRM_XE_ENGINE_CLASS_VIDEO_DECODE
+ *  - %DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE
+ *  - %DRM_XE_ENGINE_CLASS_COMPUTE
+ *  - %DRM_XE_ENGINE_CLASS_VM_BIND - Kernel only classes (not actual
+ *    hardware engine class). Used for creating ordered queues of VM
+ *    bind operations.
  */
 struct drm_xe_engine_class_instance {
 #define DRM_XE_ENGINE_CLASS_RENDER             0
@@ -137,15 +230,12 @@ struct drm_xe_engine_class_instance {
 #define DRM_XE_ENGINE_CLASS_VIDEO_DECODE       2
 #define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE      3
 #define DRM_XE_ENGINE_CLASS_COMPUTE            4
-       /*
-        * Kernel only classes (not actual hardware engine class). Used for
-        * creating ordered queues of VM bind operations.
-        */
-#define DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC      5
-#define DRM_XE_ENGINE_CLASS_VM_BIND_SYNC       6
+#define DRM_XE_ENGINE_CLASS_VM_BIND            5
+       /** @engine_class: engine class id */
        __u16 engine_class;
-
+       /** @engine_instance: engine instance id */
        __u16 engine_instance;
+       /** @gt_id: Unique ID of this GT within the PCI Device */
        __u16 gt_id;
        /** @pad: MBZ */
        __u16 pad;
@@ -204,10 +294,9 @@ struct drm_xe_mem_region {
         */
        __u16 mem_class;
        /**
-        * @instance: The instance for this region.
-        *
-        * The @mem_class and @instance taken together will always give
-        * a unique pair.
+        * @instance: The unique ID for this region, which serves as the
+        * index in the placement bitmask used as argument for
+        * &DRM_IOCTL_XE_GEM_CREATE
         */
        __u16 instance;
        /**
@@ -265,61 +354,10 @@ struct drm_xe_mem_region {
         * here will always be zero).
         */
        __u64 cpu_visible_used;
-       /** @reserved: MBZ */
+       /** @reserved: Reserved */
        __u64 reserved[6];
 };
 
-/**
- * struct drm_xe_query_engine_cycles - correlate CPU and GPU timestamps
- *
- * If a query is made with a struct drm_xe_device_query where .query is equal 
to
- * DRM_XE_DEVICE_QUERY_ENGINE_CYCLES, then the reply uses struct 
drm_xe_query_engine_cycles
- * in .data. struct drm_xe_query_engine_cycles is allocated by the user and
- * .data points to this allocated structure.
- *
- * The query returns the engine cycles, which along with GT's @reference_clock,
- * can be used to calculate the engine timestamp. In addition the
- * query returns a set of cpu timestamps that indicate when the command
- * streamer cycle count was captured.
- */
-struct drm_xe_query_engine_cycles {
-       /**
-        * @eci: This is input by the user and is the engine for which command
-        * streamer cycles is queried.
-        */
-       struct drm_xe_engine_class_instance eci;
-
-       /**
-        * @clockid: This is input by the user and is the reference clock id for
-        * CPU timestamp. For definition, see clock_gettime(2) and
-        * perf_event_open(2). Supported clock ids are CLOCK_MONOTONIC,
-        * CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME, CLOCK_TAI.
-        */
-       __s32 clockid;
-
-       /** @width: Width of the engine cycle counter in bits. */
-       __u32 width;
-
-       /**
-        * @engine_cycles: Engine cycles as read from its register
-        * at 0x358 offset.
-        */
-       __u64 engine_cycles;
-
-       /**
-        * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before
-        * reading the engine_cycles register using the reference clockid set 
by the
-        * user.
-        */
-       __u64 cpu_timestamp;
-
-       /**
-        * @cpu_delta: Time delta in ns captured around reading the lower dword
-        * of the engine_cycles register.
-        */
-       __u64 cpu_delta;
-};
-
 /**
  * struct drm_xe_query_mem_regions - describe memory regions
  *
@@ -343,6 +381,19 @@ struct drm_xe_query_mem_regions {
  * is equal to DRM_XE_DEVICE_QUERY_CONFIG, then the reply uses
  * struct drm_xe_query_config in .data.
  *
+ * The index in @info can be:
+ *  - %DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID - Device ID (lower 16 bits)
+ *    and the device revision (next 8 bits)
+ *  - %DRM_XE_QUERY_CONFIG_FLAGS - Flags describing the device
+ *    configuration, see list below
+ *
+ *    - %DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM - Flag is set if the device
+ *      has usable VRAM
+ *  - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment
+ *    required by this device, typically SZ_4K or SZ_64K
+ *  - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address
+ *  - %DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY - Value of the highest
+ *    available exec queue priority
  */
 struct drm_xe_query_config {
        /** @num_params: number of parameters returned in info */
@@ -368,6 +419,10 @@ struct drm_xe_query_config {
  * existing GT individual descriptions.
  * Graphics Technology (GT) is a subset of a GPU/tile that is responsible for
  * implementing graphics and/or media operations.
+ *
+ * The index in @type can be:
+ *  - %DRM_XE_QUERY_GT_TYPE_MAIN
+ *  - %DRM_XE_QUERY_GT_TYPE_MEDIA
  */
 struct drm_xe_gt {
 #define DRM_XE_QUERY_GT_TYPE_MAIN              0
@@ -386,6 +441,10 @@ struct drm_xe_gt {
         * @near_mem_regions: Bit mask of instances from
         * drm_xe_query_mem_regions that are nearest to the current engines
         * of this GT.
+        * Each index in this mask refers directly to the struct
+        * drm_xe_query_mem_regions' instance, no assumptions should
+        * be made about order. The type of each region is described
+        * by struct drm_xe_query_mem_regions' mem_class.
         */
        __u64 near_mem_regions;
        /**
@@ -394,6 +453,10 @@ struct drm_xe_gt {
         * In general, they have extra indirections when compared to the
         * @near_mem_regions. For a discrete device this could mean system
         * memory and memory living in a different tile.
+        * Each index in this mask refers directly to the struct
+        * drm_xe_query_mem_regions' instance, no assumptions should
+        * be made about order. The type of each region is described
+        * by struct drm_xe_query_mem_regions' mem_class.
         */
        __u64 far_mem_regions;
        /** @reserved: Reserved */
@@ -425,34 +488,30 @@ struct drm_xe_query_gt_list {
  * If a query is made with a struct drm_xe_device_query where .query
  * is equal to DRM_XE_DEVICE_QUERY_GT_TOPOLOGY, then the reply uses
  * struct drm_xe_query_topology_mask in .data.
+ *
+ * The @type can be:
+ *  - %DRM_XE_TOPO_DSS_GEOMETRY - To query the mask of Dual Sub Slices
+ *    (DSS) available for geometry operations. For example a query response
+ *    containing the following in mask:
+ *    ``DSS_GEOMETRY    ff ff ff ff 00 00 00 00``
+ *    means 32 DSS are available for geometry.
+ *  - %DRM_XE_TOPO_DSS_COMPUTE - To query the mask of Dual Sub Slices
+ *    (DSS) available for compute operations. For example a query response
+ *    containing the following in mask:
+ *    ``DSS_COMPUTE    ff ff ff ff 00 00 00 00``
+ *    means 32 DSS are available for compute.
+ *  - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU)
+ *    available per Dual Sub Slices (DSS). For example a query response
+ *    containing the following in mask:
+ *    ``EU_PER_DSS    ff ff 00 00 00 00 00 00``
+ *    means each DSS has 16 EU.
  */
 struct drm_xe_query_topology_mask {
        /** @gt_id: GT ID the mask is associated with */
        __u16 gt_id;
 
-       /*
-        * To query the mask of Dual Sub Slices (DSS) available for geometry
-        * operations. For example a query response containing the following
-        * in mask:
-        *   DSS_GEOMETRY    ff ff ff ff 00 00 00 00
-        * means 32 DSS are available for geometry.
-        */
 #define DRM_XE_TOPO_DSS_GEOMETRY       (1 << 0)
-       /*
-        * To query the mask of Dual Sub Slices (DSS) available for compute
-        * operations. For example a query response containing the following
-        * in mask:
-        *   DSS_COMPUTE    ff ff ff ff 00 00 00 00
-        * means 32 DSS are available for compute.
-        */
 #define DRM_XE_TOPO_DSS_COMPUTE                (1 << 1)
-       /*
-        * To query the mask of Execution Units (EU) available per Dual Sub
-        * Slices (DSS). For example a query response containing the following
-        * in mask:
-        *   EU_PER_DSS    ff ff 00 00 00 00 00 00
-        * means each DSS has 16 EU.
-        */
 #define DRM_XE_TOPO_EU_PER_DSS         (1 << 2)
        /** @type: type of mask */
        __u16 type;
@@ -465,11 +524,81 @@ struct drm_xe_query_topology_mask {
 };
 
 /**
- * struct drm_xe_device_query - main structure to query device information
+ * struct drm_xe_query_engine_cycles - correlate CPU and GPU timestamps
+ *
+ * If a query is made with a struct drm_xe_device_query where .query is equal 
to
+ * DRM_XE_DEVICE_QUERY_ENGINE_CYCLES, then the reply uses struct 
drm_xe_query_engine_cycles
+ * in .data. struct drm_xe_query_engine_cycles is allocated by the user and
+ * .data points to this allocated structure.
+ *
+ * The query returns the engine cycles, which along with GT's @reference_clock,
+ * can be used to calculate the engine timestamp. In addition the
+ * query returns a set of cpu timestamps that indicate when the command
+ * streamer cycle count was captured.
+ */
+struct drm_xe_query_engine_cycles {
+       /**
+        * @eci: This is input by the user and is the engine for which command
+        * streamer cycles is queried.
+        */
+       struct drm_xe_engine_class_instance eci;
+
+       /**
+        * @clockid: This is input by the user and is the reference clock id for
+        * CPU timestamp. For definition, see clock_gettime(2) and
+        * perf_event_open(2). Supported clock ids are CLOCK_MONOTONIC,
+        * CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME, CLOCK_TAI.
+        */
+       __s32 clockid;
+
+       /** @width: Width of the engine cycle counter in bits. */
+       __u32 width;
+
+       /**
+        * @engine_cycles: Engine cycles as read from its register
+        * at 0x358 offset.
+        */
+       __u64 engine_cycles;
+
+       /**
+        * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before
+        * reading the engine_cycles register using the reference clockid set 
by the
+        * user.
+        */
+       __u64 cpu_timestamp;
+
+       /**
+        * @cpu_delta: Time delta in ns captured around reading the lower dword
+        * of the engine_cycles register.
+        */
+       __u64 cpu_delta;
+};
+
+/**
+ * struct drm_xe_device_query - Input of &DRM_IOCTL_XE_DEVICE_QUERY - main
+ * structure to query device information
+ *
+ * The user selects the type of data to query among DRM_XE_DEVICE_QUERY_*
+ * and sets the value in the query member. This determines the type of
+ * the structure provided by the driver in data, among struct drm_xe_query_*.
  *
- * If size is set to 0, the driver fills it with the required size for the
- * requested type of data to query. If size is equal to the required size,
- * the queried information is copied into data.
+ * The @query can be:
+ *  - %DRM_XE_DEVICE_QUERY_ENGINES
+ *  - %DRM_XE_DEVICE_QUERY_MEM_REGIONS
+ *  - %DRM_XE_DEVICE_QUERY_CONFIG
+ *  - %DRM_XE_DEVICE_QUERY_GT_LIST
+ *  - %DRM_XE_DEVICE_QUERY_HWCONFIG - Query type to retrieve the hardware
+ *    configuration of the device such as information on slices, memory,
+ *    caches, and so on. It is provided as a table of key / value
+ *    attributes.
+ *  - %DRM_XE_DEVICE_QUERY_GT_TOPOLOGY
+ *  - %DRM_XE_DEVICE_QUERY_ENGINE_CYCLES
+ *
+ * If size is set to 0, the driver fills it with the required size for
+ * the requested type of data to query. If size is equal to the required
+ * size, the queried information is copied into data. If size is set to
+ * a value different from 0 and different from the required size, the
+ * IOCTL call returns -EINVAL.
  *
  * For example the following code snippet allows retrieving and printing
  * information about the device engines with DRM_XE_DEVICE_QUERY_ENGINES:
@@ -527,6 +656,37 @@ struct drm_xe_device_query {
        __u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_gem_create - Input of &DRM_IOCTL_XE_GEM_CREATE - A structure 
for
+ * gem creation
+ *
+ * The @flags can be:
+ *  - %DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING
+ *  - %DRM_XE_GEM_CREATE_FLAG_SCANOUT
+ *  - %DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM - When using VRAM as a
+ *    possible placement, ensure that the corresponding VRAM allocation
+ *    will always use the CPU accessible part of VRAM. This is important
+ *    for small-bar systems (on full-bar systems this gets turned into a
+ *    noop).
+ *    Note1: System memory can be used as an extra placement if the kernel
+ *    should spill the allocation to system memory, if space can't be made
+ *    available in the CPU accessible part of VRAM (giving the same
+ *    behaviour as the i915 interface, see
+ *    I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS).
+ *    Note2: For clear-color CCS surfaces the kernel needs to read the
+ *    clear-color value stored in the buffer, and on discrete platforms we
+ *    need to use VRAM for display surfaces, therefore the kernel requires
+ *    setting this flag for such objects, otherwise an error is thrown on
+ *    small-bar systems.
+ *
+ * @cpu_caching supports the following values:
+ *  - %DRM_XE_GEM_CPU_CACHING_WB - Allocate the pages with write-back
+ *    caching. On iGPU this can't be used for scanout surfaces. Currently
+ *    not allowed for objects placed in VRAM.
+ *  - %DRM_XE_GEM_CPU_CACHING_WC - Allocate the pages as write-combined. This
+ *    is uncached. Scanout surfaces should likely use this. All objects
+ *    that can be placed in VRAM must use this.
+ */
 struct drm_xe_gem_create {
        /** @extensions: Pointer to the first extension struct, if any */
        __u64 extensions;
@@ -537,26 +697,17 @@ struct drm_xe_gem_create {
         */
        __u64 size;
 
-       /** @placement: A mask of memory instances of where BO can be placed. */
+       /**
+        * @placement: A mask of memory instances of where BO can be placed.
+        * Each index in this mask refers directly to the struct
+        * drm_xe_query_mem_regions' instance, no assumptions should
+        * be made about order. The type of each region is described
+        * by struct drm_xe_query_mem_regions' mem_class.
+        */
        __u32 placement;
 
 #define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING           (1 << 0)
 #define DRM_XE_GEM_CREATE_FLAG_SCANOUT                 (1 << 1)
-/*
- * When using VRAM as a possible placement, ensure that the corresponding VRAM
- * allocation will always use the CPU accessible part of VRAM. This is 
important
- * for small-bar systems (on full-bar systems this gets turned into a noop).
- *
- * Note: System memory can be used as an extra placement if the kernel should
- * spill the allocation to system memory, if space can't be made available in
- * the CPU accessible part of VRAM (giving the same behaviour as the i915
- * interface, see I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS).
- *
- * Note: For clear-color CCS surfaces the kernel needs to read the clear-color
- * value stored in the buffer, and on discrete platforms we need to use VRAM 
for
- * display surfaces, therefore the kernel requires setting this flag for such
- * objects, otherwise an error is thrown on small-bar systems.
- */
 #define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM      (1 << 2)
        /**
         * @flags: Flags, currently a mask of memory instances of where BO can
@@ -581,22 +732,12 @@ struct drm_xe_gem_create {
         */
        __u32 handle;
 
+#define DRM_XE_GEM_CPU_CACHING_WB                      1
+#define DRM_XE_GEM_CPU_CACHING_WC                      2
        /**
         * @cpu_caching: The CPU caching mode to select for this object. If
         * mmaping the object the mode selected here will also be used.
-        *
-        * Supported values:
-        *
-        * DRM_XE_GEM_CPU_CACHING_WB: Allocate the pages with write-back
-        * caching.  On iGPU this can't be used for scanout surfaces. Currently
-        * not allowed for objects placed in VRAM.
-        *
-        * DRM_XE_GEM_CPU_CACHING_WC: Allocate the pages as write-combined. This
-        * is uncached. Scanout surfaces should likely use this. All objects
-        * that can be placed in VRAM must use this.
         */
-#define DRM_XE_GEM_CPU_CACHING_WB                      1
-#define DRM_XE_GEM_CPU_CACHING_WC                      2
        __u16 cpu_caching;
        /** @pad: MBZ */
        __u16 pad[3];
@@ -605,6 +746,9 @@ struct drm_xe_gem_create {
        __u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_gem_mmap_offset - Input of &DRM_IOCTL_XE_GEM_MMAP_OFFSET
+ */
 struct drm_xe_gem_mmap_offset {
        /** @extensions: Pointer to the first extension struct, if any */
        __u64 extensions;
@@ -622,53 +766,36 @@ struct drm_xe_gem_mmap_offset {
        __u64 reserved[2];
 };
 
-/** struct drm_xe_ext_set_property - XE set property extension */
-struct drm_xe_ext_set_property {
-       /** @base: base user extension */
-       struct xe_user_extension base;
-
-       /** @property: property to set */
-       __u32 property;
-
-       /** @pad: MBZ */
-       __u32 pad;
-
-       /** @value: property value */
-       __u64 value;
-
-       /** @reserved: Reserved */
-       __u64 reserved[2];
-};
-
+/**
+ * struct drm_xe_vm_create - Input of &DRM_IOCTL_XE_VM_CREATE
+ *
+ * The @flags can be:
+ *  - %DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE
+ *  - %DRM_XE_VM_CREATE_FLAG_LR_MODE - An LR, or Long Running VM accepts
+ *    exec submissions to its exec_queues that don't have an upper time
+ *    limit on the job execution time. But exec submissions to these
+ *    don't allow any of the flags DRM_XE_SYNC_FLAG_SYNCOBJ,
+ *    DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ, DRM_XE_SYNC_FLAG_DMA_BUF,
+ *    used as out-syncobjs, that is, together with DRM_XE_SYNC_FLAG_SIGNAL.
+ *    LR VMs can be created in recoverable page-fault mode using
+ *    DRM_XE_VM_CREATE_FLAG_FAULT_MODE, if the device supports it.
+ *    If that flag is omitted, the UMD can not rely on the slightly
+ *    different per-VM overcommit semantics that are enabled by
+ *    DRM_XE_VM_CREATE_FLAG_FAULT_MODE (see below), but KMD may
+ *    still enable recoverable pagefaults if supported by the device.
+ *  - %DRM_XE_VM_CREATE_FLAG_FAULT_MODE - Requires also
+ *    DRM_XE_VM_CREATE_FLAG_LR_MODE. It allows memory to be allocated on
+ *    demand when accessed, and also allows per-VM overcommit of memory.
+ *    The xe driver internally uses recoverable pagefaults to implement
+ *    this.
+ */
 struct drm_xe_vm_create {
        /** @extensions: Pointer to the first extension struct, if any */
        __u64 extensions;
 
 #define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE     (1 << 0)
-       /*
-        * An LR, or Long Running VM accepts exec submissions
-        * to its exec_queues that don't have an upper time limit on
-        * the job execution time. But exec submissions to these
-        * don't allow any of the flags DRM_XE_SYNC_FLAG_SYNCOBJ,
-        * DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ, DRM_XE_SYNC_FLAG_DMA_BUF,
-        * used as out-syncobjs, that is, together with DRM_XE_SYNC_FLAG_SIGNAL.
-        * LR VMs can be created in recoverable page-fault mode using
-        * DRM_XE_VM_CREATE_FLAG_FAULT_MODE, if the device supports it.
-        * If that flag is omitted, the UMD can not rely on the slightly
-        * different per-VM overcommit semantics that are enabled by
-        * DRM_XE_VM_CREATE_FLAG_FAULT_MODE (see below), but KMD may
-        * still enable recoverable pagefaults if supported by the device.
-        */
 #define DRM_XE_VM_CREATE_FLAG_LR_MODE          (1 << 1)
-#define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT    (1 << 2)
-       /*
-        * DRM_XE_VM_CREATE_FLAG_FAULT_MODE requires also
-        * DRM_XE_VM_CREATE_FLAG_LR_MODE. It allows memory to be allocated
-        * on demand when accessed, and also allows per-VM overcommit of memory.
-        * The xe driver internally uses recoverable pagefaults to implement
-        * this.
-        */
-#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE       (1 << 3)
+#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE       (1 << 2)
        /** @flags: Flags */
        __u32 flags;
 
@@ -679,6 +806,9 @@ struct drm_xe_vm_create {
        __u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_vm_destroy - Input of &DRM_IOCTL_XE_VM_DESTROY
+ */
 struct drm_xe_vm_destroy {
        /** @vm_id: VM ID */
        __u32 vm_id;
@@ -690,6 +820,29 @@ struct drm_xe_vm_destroy {
        __u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_vm_bind_op - run bind operations
+ *
+ * The @op can be:
+ *  - %DRM_XE_VM_BIND_OP_MAP
+ *  - %DRM_XE_VM_BIND_OP_UNMAP
+ *  - %DRM_XE_VM_BIND_OP_MAP_USERPTR
+ *  - %DRM_XE_VM_BIND_OP_UNMAP_ALL
+ *  - %DRM_XE_VM_BIND_OP_PREFETCH
+ *
+ * and the @flags can be:
+ *  - %DRM_XE_VM_BIND_FLAG_READONLY
+ *  - %DRM_XE_VM_BIND_FLAG_ASYNC
+ *  - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the
+ *    MAP operation immediately rather than deferring the MAP to the page
+ *    fault handler.
+ *  - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page
+ *    tables are setup with a special bit which indicates writes are
+ *    dropped and all reads return zero. In the future, the NULL flags
+ *    will only be valid for DRM_XE_VM_BIND_OP_MAP operations, the BO
+ *    handle MBZ, and the BO offset MBZ. This flag is intended to
+ *    implement VK sparse bindings.
+ */
 struct drm_xe_vm_bind_op {
        /** @extensions: Pointer to the first extension struct, if any */
        __u64 extensions;
@@ -736,6 +889,12 @@ struct drm_xe_vm_bind_op {
         *
         * Note: For userptr and externally imported dma-buf the kernel expects
         * either 1WAY or 2WAY for the @pat_index.
+        *
+        * For DRM_XE_VM_BIND_FLAG_NULL bindings there are no KMD restrictions
+        * on the @pat_index. For such mappings there is no actual memory being
+        * mapped (the address in the PTE is invalid), so the various PAT memory
+        * attributes likely do not apply.  Simply leaving as zero is one
+        * option (still a valid pat_index).
         */
        __u16 pat_index;
 
@@ -770,20 +929,8 @@ struct drm_xe_vm_bind_op {
        __u32 op;
 
 #define DRM_XE_VM_BIND_FLAG_READONLY   (1 << 0)
-#define DRM_XE_VM_BIND_FLAG_ASYNC      (1 << 1)
-       /*
-        * Valid on a faulting VM only, do the MAP operation immediately rather
-        * than deferring the MAP to the page fault handler.
-        */
-#define DRM_XE_VM_BIND_FLAG_IMMEDIATE  (1 << 2)
-       /*
-        * When the NULL flag is set, the page tables are setup with a special
-        * bit which indicates writes are dropped and all reads return zero.  In
-        * the future, the NULL flags will only be valid for 
DRM_XE_VM_BIND_OP_MAP
-        * operations, the BO handle MBZ, and the BO offset MBZ. This flag is
-        * intended to implement VK sparse bindings.
-        */
-#define DRM_XE_VM_BIND_FLAG_NULL       (1 << 3)
+#define DRM_XE_VM_BIND_FLAG_IMMEDIATE  (1 << 1)
+#define DRM_XE_VM_BIND_FLAG_NULL       (1 << 2)
        /** @flags: Bind flags */
        __u32 flags;
 
@@ -794,13 +941,40 @@ struct drm_xe_vm_bind_op {
         */
        __u32 prefetch_mem_region_instance;
 
-       /** @pad: MBZ */
+       /** @pad2: MBZ */
        __u32 pad2;
 
        /** @reserved: Reserved */
        __u64 reserved[3];
 };
 
+/**
+ * struct drm_xe_vm_bind - Input of &DRM_IOCTL_XE_VM_BIND
+ *
+ * Below is an example of a minimal use of @drm_xe_vm_bind to
+ * asynchronously bind the buffer `data` at address `BIND_ADDRESS` to
+ * illustrate `userptr`. It can be synchronized by using the example
+ * provided for @drm_xe_sync.
+ *
+ * .. code-block:: C
+ *
+ *     data = aligned_alloc(ALIGNMENT, BO_SIZE);
+ *     struct drm_xe_vm_bind bind = {
+ *         .vm_id = vm,
+ *         .num_binds = 1,
+ *         .bind.obj = 0,
+ *         .bind.obj_offset = to_user_pointer(data),
+ *         .bind.range = BO_SIZE,
+ *         .bind.addr = BIND_ADDRESS,
+ *         .bind.op = DRM_XE_VM_BIND_OP_MAP_USERPTR,
+ *         .bind.flags = 0,
+ *         .num_syncs = 1,
+ *         .syncs = &sync,
+ *         .exec_queue_id = 0,
+ *     };
+ *     ioctl(fd, DRM_IOCTL_XE_VM_BIND, &bind);
+ *
+ */
 struct drm_xe_vm_bind {
        /** @extensions: Pointer to the first extension struct, if any */
        __u64 extensions;
@@ -832,7 +1006,7 @@ struct drm_xe_vm_bind {
                __u64 vector_of_binds;
        };
 
-       /** @pad: MBZ */
+       /** @pad2: MBZ */
        __u32 pad2;
 
        /** @num_syncs: amount of syncs to wait on */
@@ -845,20 +1019,28 @@ struct drm_xe_vm_bind {
        __u64 reserved[2];
 };
 
-/* For use with DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY */
-
-/* Monitor 128KB contiguous region with 4K sub-granularity */
-#define DRM_XE_ACC_GRANULARITY_128K 0
-
-/* Monitor 2MB contiguous region with 64KB sub-granularity */
-#define DRM_XE_ACC_GRANULARITY_2M 1
-
-/* Monitor 16MB contiguous region with 512KB sub-granularity */
-#define DRM_XE_ACC_GRANULARITY_16M 2
-
-/* Monitor 64MB contiguous region with 2M sub-granularity */
-#define DRM_XE_ACC_GRANULARITY_64M 3
-
+/**
+ * struct drm_xe_exec_queue_create - Input of &DRM_IOCTL_XE_EXEC_QUEUE_CREATE
+ *
+ * The example below shows how to use @drm_xe_exec_queue_create to create
+ * a simple exec_queue (no parallel submission) of class
+ * &DRM_XE_ENGINE_CLASS_RENDER.
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_xe_engine_class_instance instance = {
+ *         .engine_class = DRM_XE_ENGINE_CLASS_RENDER,
+ *     };
+ *     struct drm_xe_exec_queue_create exec_queue_create = {
+ *          .extensions = 0,
+ *          .vm_id = vm,
+ *          .num_bb_per_exec = 1,
+ *          .num_eng_per_bb = 1,
+ *          .instances = to_user_pointer(&instance),
+ *     };
+ *     ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &exec_queue_create);
+ *
+ */
 struct drm_xe_exec_queue_create {
 #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY               0
 #define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY              0
@@ -869,6 +1051,14 @@ struct drm_xe_exec_queue_create {
 #define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER           5
 #define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY            6
 #define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY       7
+/* Monitor 128KB contiguous region with 4K sub-granularity */
+#define     DRM_XE_ACC_GRANULARITY_128K                                0
+/* Monitor 2MB contiguous region with 64KB sub-granularity */
+#define     DRM_XE_ACC_GRANULARITY_2M                          1
+/* Monitor 16MB contiguous region with 512KB sub-granularity */
+#define     DRM_XE_ACC_GRANULARITY_16M                         2
+/* Monitor 64MB contiguous region with 2M sub-granularity */
+#define     DRM_XE_ACC_GRANULARITY_64M                         3
 
        /** @extensions: Pointer to the first extension struct, if any */
        __u64 extensions;
@@ -901,6 +1091,26 @@ struct drm_xe_exec_queue_create {
        __u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_exec_queue_destroy - Input of &DRM_IOCTL_XE_EXEC_QUEUE_DESTROY
+ */
+struct drm_xe_exec_queue_destroy {
+       /** @exec_queue_id: Exec queue ID */
+       __u32 exec_queue_id;
+
+       /** @pad: MBZ */
+       __u32 pad;
+
+       /** @reserved: Reserved */
+       __u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_exec_queue_get_property - Input of 
&DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
+ *
+ * The @property can be:
+ *  - %DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN
+ */
 struct drm_xe_exec_queue_get_property {
        /** @extensions: Pointer to the first extension struct, if any */
        __u64 extensions;
@@ -919,17 +1129,41 @@ struct drm_xe_exec_queue_get_property {
        __u64 reserved[2];
 };
 
-struct drm_xe_exec_queue_destroy {
-       /** @exec_queue_id: Exec queue ID */
-       __u32 exec_queue_id;
-
-       /** @pad: MBZ */
-       __u32 pad;
-
-       /** @reserved: Reserved */
-       __u64 reserved[2];
-};
-
+/**
+ * struct drm_xe_sync - sync object
+ *
+ * The @type can be:
+ *  - %DRM_XE_SYNC_TYPE_SYNCOBJ
+ *  - %DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ
+ *  - %DRM_XE_SYNC_TYPE_USER_FENCE
+ *
+ * and the @flags can be:
+ *  - %DRM_XE_SYNC_FLAG_SIGNAL
+ *
+ * A minimal use of @drm_xe_sync looks like this:
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_xe_sync sync = {
+ *         .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+ *         .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
+ *     };
+ *     struct drm_syncobj_create syncobj_create = { 0 };
+ *     ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &syncobj_create);
+ *     sync.handle = syncobj_create.handle;
+ *         ...
+ *         use of &sync in drm_xe_exec or drm_xe_vm_bind
+ *         ...
+ *     struct drm_syncobj_wait wait = {
+ *         .handles = &sync.handle,
+ *         .timeout_nsec = INT64_MAX,
+ *         .count_handles = 1,
+ *         .flags = 0,
+ *         .first_signaled = 0,
+ *         .pad = 0,
+ *     };
+ *     ioctl(fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait);
+ */
 struct drm_xe_sync {
        /** @extensions: Pointer to the first extension struct, if any */
        __u64 extensions;
@@ -945,11 +1179,12 @@ struct drm_xe_sync {
        __u32 flags;
 
        union {
+               /** @handle: Handle for the object */
                __u32 handle;
 
                /**
-                * @addr: Address of user fence. When sync passed in via exec
-                * IOCTL this a GPU address in the VM. When sync passed in via
+                * @addr: Address of user fence. When sync is passed in via exec
+                * IOCTL this is a GPU address in the VM. When sync passed in 
via
                 * VM bind IOCTL this is a user pointer. In either case, it is
                 * the users responsibility that this address is present and
                 * mapped when the user fence is signalled. Must be qword
@@ -958,12 +1193,36 @@ struct drm_xe_sync {
                __u64 addr;
        };
 
+       /**
+        * @timeline_value: Input for the timeline sync object. Needs to be
+        * different than 0 when used with %DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ.
+        */
        __u64 timeline_value;
 
        /** @reserved: Reserved */
        __u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_exec - Input of &DRM_IOCTL_XE_EXEC
+ *
+ * This is an example to use @drm_xe_exec for execution of the object
+ * at BIND_ADDRESS (see example in @drm_xe_vm_bind) by an exec_queue
+ * (see example in @drm_xe_exec_queue_create). It can be synchronized
+ * by using the example provided for @drm_xe_sync.
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_xe_exec exec = {
+ *         .exec_queue_id = exec_queue,
+ *         .syncs = &sync,
+ *         .num_syncs = 1,
+ *         .address = BIND_ADDRESS,
+ *         .num_batch_buffer = 1,
+ *     };
+ *     ioctl(fd, DRM_IOCTL_XE_EXEC, &exec);
+ *
+ */
 struct drm_xe_exec {
        /** @extensions: Pointer to the first extension struct, if any */
        __u64 extensions;
@@ -997,7 +1256,7 @@ struct drm_xe_exec {
 };
 
 /**
- * struct drm_xe_wait_user_fence - wait user fence
+ * struct drm_xe_wait_user_fence - Input of &DRM_IOCTL_XE_WAIT_USER_FENCE
  *
  * Wait on user fence, XE will wake-up on every HW engine interrupt in the
  * instances list and check if user fence is complete::
@@ -1005,6 +1264,24 @@ struct drm_xe_exec {
  *     (*addr & MASK) OP (VALUE & MASK)
  *
  * Returns to user on user fence completion or timeout.
+ *
+ * The @op can be:
+ *  - %DRM_XE_UFENCE_WAIT_OP_EQ
+ *  - %DRM_XE_UFENCE_WAIT_OP_NEQ
+ *  - %DRM_XE_UFENCE_WAIT_OP_GT
+ *  - %DRM_XE_UFENCE_WAIT_OP_GTE
+ *  - %DRM_XE_UFENCE_WAIT_OP_LT
+ *  - %DRM_XE_UFENCE_WAIT_OP_LTE
+ *
+ * and the @flags can be:
+ *  - %DRM_XE_UFENCE_WAIT_FLAG_ABSTIME
+ *  - %DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP
+ *
+ * The @mask values can be for example:
+ *  - 0xffu for u8
+ *  - 0xffffu for u16
+ *  - 0xffffffffu for u32
+ *  - 0xffffffffffffffffu for u64
  */
 struct drm_xe_wait_user_fence {
        /** @extensions: Pointer to the first extension struct, if any */
@@ -1024,8 +1301,7 @@ struct drm_xe_wait_user_fence {
        /** @op: wait operation (type of comparison) */
        __u16 op;
 
-#define DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP        (1 << 0)        /* e.g. Wait on 
VM bind */
-#define DRM_XE_UFENCE_WAIT_FLAG_ABSTIME        (1 << 1)
+#define DRM_XE_UFENCE_WAIT_FLAG_ABSTIME        (1 << 0)
        /** @flags: wait flags */
        __u16 flags;
 
@@ -1035,10 +1311,6 @@ struct drm_xe_wait_user_fence {
        /** @value: compare value */
        __u64 value;
 
-#define DRM_XE_UFENCE_WAIT_MASK_U8     0xffu
-#define DRM_XE_UFENCE_WAIT_MASK_U16    0xffffu
-#define DRM_XE_UFENCE_WAIT_MASK_U32    0xffffffffu
-#define DRM_XE_UFENCE_WAIT_MASK_U64    0xffffffffffffffffu
        /** @mask: comparison mask */
        __u64 mask;
 
@@ -1058,61 +1330,16 @@ struct drm_xe_wait_user_fence {
         */
        __s64 timeout;
 
-       /**
-        * @num_engines: number of engine instances to wait on, must be zero
-        * when DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP set
-        */
-       __u64 num_engines;
+       /** @exec_queue_id: exec_queue_id returned from 
xe_exec_queue_create_ioctl */
+       __u32 exec_queue_id;
 
-       /**
-        * @instances: user pointer to array of drm_xe_engine_class_instance to
-        * wait on, must be NULL when DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP set
-        */
-       __u64 instances;
+       /** @pad2: MBZ */
+       __u32 pad2;
 
        /** @reserved: Reserved */
        __u64 reserved[2];
 };
 
-/**
- * DOC: XE PMU event config IDs
- *
- * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed in 
xe_drm.h
- * in 'struct perf_event_attr' as part of perf_event_open syscall to read a
- * particular event.
- *
- * For example to open the DRMXE_PMU_RENDER_GROUP_BUSY(0):
- *
- * .. code-block:: C
- *
- *     struct perf_event_attr attr;
- *     long long count;
- *     int cpu = 0;
- *     int fd;
- *
- *     memset(&attr, 0, sizeof(struct perf_event_attr));
- *     attr.type = type; // eg: 
/sys/bus/event_source/devices/xe_0000_56_00.0/type
- *     attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
- *     attr.use_clockid = 1;
- *     attr.clockid = CLOCK_MONOTONIC;
- *     attr.config = DRM_XE_PMU_RENDER_GROUP_BUSY(0);
- *
- *     fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
- */
-
-/*
- * Top bits of every counter are GT id.
- */
-#define __DRM_XE_PMU_GT_SHIFT (56)
-
-#define ___DRM_XE_PMU_OTHER(gt, x) \
-       (((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))
-
-#define DRM_XE_PMU_RENDER_GROUP_BUSY(gt)       ___DRM_XE_PMU_OTHER(gt, 0)
-#define DRM_XE_PMU_COPY_GROUP_BUSY(gt)         ___DRM_XE_PMU_OTHER(gt, 1)
-#define DRM_XE_PMU_MEDIA_GROUP_BUSY(gt)                ___DRM_XE_PMU_OTHER(gt, 
2)
-#define DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(gt)   ___DRM_XE_PMU_OTHER(gt, 3)
-
 #if defined(__cplusplus)
 }
 #endif
diff --git a/src/gallium/drivers/iris/xe/iris_kmd_backend.c 
b/src/gallium/drivers/iris/xe/iris_kmd_backend.c
index 4c4d731ca6a..2f49c4d8bea 100644
--- a/src/gallium/drivers/iris/xe/iris_kmd_backend.c
+++ b/src/gallium/drivers/iris/xe/iris_kmd_backend.c
@@ -106,8 +106,25 @@ xe_gem_vm_bind_op(struct iris_bo *bo, uint32_t op)
 {
    const struct intel_device_info *devinfo = 
iris_bufmgr_get_device_info(bo->bufmgr);
    uint32_t handle = op == DRM_XE_VM_BIND_OP_UNMAP ? 0 : bo->gem_handle;
+   struct drm_xe_sync xe_sync = {
+      .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
+      .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+   };
+   struct drm_syncobj_create syncobj_create = {};
+   struct drm_syncobj_destroy syncobj_destroy = {};
+   struct drm_syncobj_wait syncobj_wait = {
+      .timeout_nsec = INT64_MAX,
+      .count_handles = 1,
+   };
    uint64_t range, obj_offset = 0;
-   int ret;
+   int ret, fd;
+
+   fd = iris_bufmgr_get_fd(bo->bufmgr);
+   ret = intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &syncobj_create);
+   if (ret)
+      return ret;
+
+   xe_sync.handle = syncobj_create.handle;
 
    if (iris_bo_is_imported(bo))
       range = bo->size;
@@ -127,6 +144,8 @@ xe_gem_vm_bind_op(struct iris_bo *bo, uint32_t op)
 
    struct drm_xe_vm_bind args = {
       .vm_id = iris_bufmgr_get_global_vm_id(bo->bufmgr),
+      .num_syncs = 1,
+      .syncs = (uintptr_t)&xe_sync,
       .num_binds = 1,
       .bind.obj = handle,
       .bind.obj_offset = obj_offset,
@@ -135,11 +154,17 @@ xe_gem_vm_bind_op(struct iris_bo *bo, uint32_t op)
       .bind.op = op,
       .bind.pat_index = pat_index,
    };
-   ret = intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr), DRM_IOCTL_XE_VM_BIND, 
&args);
-   if (ret) {
+   ret = intel_ioctl(fd, DRM_IOCTL_XE_VM_BIND, &args);
+   if (ret == 0) {
+      syncobj_wait.handles = (uintptr_t)&xe_sync.handle;
+      ret = intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_WAIT, &syncobj_wait);
+   } else {
       DBG("vm_bind_op: DRM_IOCTL_XE_VM_BIND failed(%i)", ret);
    }
 
+   syncobj_destroy.handle = xe_sync.handle;
+   intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_DESTROY, &syncobj_destroy);
+
    return ret;
 }
 
diff --git a/src/intel/vulkan/xe/anv_kmd_backend.c 
b/src/intel/vulkan/xe/anv_kmd_backend.c
index db0957dd2f4..9124c04f82a 100644
--- a/src/intel/vulkan/xe/anv_kmd_backend.c
+++ b/src/intel/vulkan/xe/anv_kmd_backend.c
@@ -116,13 +116,24 @@ static inline int
 xe_vm_bind_op(struct anv_device *device,
               struct anv_sparse_submission *submit)
 {
-   int ret;
-
+   struct drm_xe_sync xe_sync = {
+      .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
+      .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+   };
    struct drm_xe_vm_bind args = {
       .vm_id = device->vm_id,
       .num_binds = submit->binds_len,
       .bind = {},
+      .num_syncs = 1,
+      .syncs = (uintptr_t)&xe_sync,
+   };
+   struct drm_syncobj_create syncobj_create = {};
+   struct drm_syncobj_destroy syncobj_destroy = {};
+   struct drm_syncobj_wait syncobj_wait = {
+      .timeout_nsec = INT64_MAX,
+      .count_handles = 1,
    };
+   int ret;
 
    STACK_ARRAY(struct drm_xe_vm_bind_op, xe_binds_stackarray,
                submit->binds_len);
@@ -173,7 +184,22 @@ xe_vm_bind_op(struct anv_device *device,
          xe_bind->userptr = (uintptr_t)bo->map;
    }
 
+   ret = intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &syncobj_create);
+   if (ret)
+      goto out_stackarray;
+
+   xe_sync.handle = syncobj_create.handle;
    ret = intel_ioctl(device->fd, DRM_IOCTL_XE_VM_BIND, &args);
+   if (ret)
+      goto out_destroy_syncobj;
+
+   syncobj_wait.handles = (uintptr_t)&xe_sync.handle;
+   ret = intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &syncobj_wait);
+
+out_destroy_syncobj:
+   syncobj_destroy.handle = xe_sync.handle;
+   intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, &syncobj_destroy);
+out_stackarray:
    STACK_ARRAY_FINISH(xe_binds_stackarray);
 
    return ret;

Reply via email to