Am 2022-09-26 um 17:40 schrieb Shashank Sharma:
Allow the user to specify a workload hint to the kernel.
We can use these to tweak the dpm heuristics to better match
the workload for improved performance.

V3: Create only set() workload UAPI (Christian)

Signed-off-by: Alex Deucher <[email protected]>
Signed-off-by: Shashank Sharma <[email protected]>
---
  include/uapi/drm/amdgpu_drm.h | 17 +++++++++++++++++
  1 file changed, 17 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index c2c9c674a223..23d354242699 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -212,6 +212,7 @@ union drm_amdgpu_bo_list {
  #define AMDGPU_CTX_OP_QUERY_STATE2    4
  #define AMDGPU_CTX_OP_GET_STABLE_PSTATE       5
  #define AMDGPU_CTX_OP_SET_STABLE_PSTATE       6
+#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE     7
/* GPU reset status */
  #define AMDGPU_CTX_NO_RESET           0
@@ -252,6 +253,17 @@ union drm_amdgpu_bo_list {
  #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
  #define AMDGPU_CTX_STABLE_PSTATE_PEAK  4
+/* GPU workload hints, flag bits 8-15 */
+#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT     8
+#define AMDGPU_CTX_WORKLOAD_HINT_MASK      (0xff << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)

8 bits seems overkill for this. Are we ever going to have 256 different workload types? Maybe 4 bits would be enough. That would allow up to 16 types.


+#define AMDGPU_CTX_WORKLOAD_HINT_NONE      (0 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_3D        (1 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO     (2 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VR        (3 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE   (4 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_MAX      AMDGPU_CTX_WORKLOAD_HINT_COMPUTE
+#define AMDGPU_CTX_WORKLOAD_INDEX(n)      (n >> AMDGPU_CTX_WORKLOAD_HINT_SHIFT)

The macro argument (n) should be wrapped in parentheses. Also, it may be a good idea to apply the AMDGPU_CTX_WORKLOAD_HINT_MASK when extracting the index, in case more flags are added at higher bits in the future:

    (((n) & AMDGPU_CTX_WORKLOAD_HINT_MASK) >> AMDGPU_WORKLOAD_HINT_SHIFT)

Regards,
  Felix


+
  struct drm_amdgpu_ctx_in {
        /** AMDGPU_CTX_OP_* */
        __u32   op;
@@ -281,6 +293,11 @@ union drm_amdgpu_ctx_out {
                        __u32   flags;
                        __u32   _pad;
                } pstate;
+
+               struct {
+                       __u32   flags;
+                       __u32   _pad;
+               } workload;
  };
union drm_amdgpu_ctx {

Reply via email to