This patch introduces a new IOCTL to provide tightly correlated
CPU and GPU timestamps for accurate performance measurements
and synchronization between host and device timelines.

Key improvements:
1. Adds AMDGPU_INFO_CLOCK_COUNTERS query type (0x06)
2. Implements atomic sampling of clocks with:
   - preempt_disable()
   - local IRQ disabling
   - GPU timestamp sampled first (higher latency)
   - CPU timestamps sampled immediately after
3. Provides three correlated clocks:
   - GPU clock counter (ns)
   - CPU raw monotonic time (ns)
   - System boottime (ns)
4. Includes system clock frequency (1GHz) for reference

The implementation addresses the need for precise CPU-GPU
timestamp correlation in ROCm applications, particularly for:
- Performance analysis tools
- Compute pipeline synchronization
- Graphics/compute interoperability
- Low-latency VR/AR applications

Signed-off-by: Jesse Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 44 +++++++++++++++++++++++++
 include/uapi/drm/amdgpu_drm.h           | 19 +++++++++++
 2 files changed, 63 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 931c52c918c4..8412c88aada9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -595,6 +595,43 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
        return 0;
 }
 
+/**
+ * get_cpu_gpu_counters - Atomically sample CPU and GPU clocks
+ * @adev: amdgpu device pointer
+ * @args: structure to store clock counters
+ *
+ * Samples CPU and GPU clocks as close to simultaneously as possible
+ * by disabling preemption and interrupts during the sampling.
+ * Returns 0 on success.
+ */
+static int get_cpu_gpu_counters(struct amdgpu_device *adev,
+                               struct drm_amdgpu_info_clock_counters *args)
+{
+       unsigned long flags;
+
+       /* Disable preemption and interrupts on local CPU */
+       preempt_disable();
+       local_irq_save(flags);
+
+       if (adev->gfx.funcs->get_gpu_clock_counter)
+               args->gpu_clock_counter = 
adev->gfx.funcs->get_gpu_clock_counter(adev);
+       else
+               args->gpu_clock_counter = 0;
+
+       /* No access to rdtsc. Using raw monotonic time */
+       args->cpu_clock_counter = ktime_get_raw_ns();
+       args->system_clock_counter = ktime_get_boottime_ns();
+
+       /* Since the counter is in nano-seconds we use 1GHz frequency */
+       args->system_clock_freq = 1000000000;
+
+       /* Restore previous state */
+       local_irq_restore(flags);
+       preempt_enable();
+
+       return 0;
+}
+
 /*
  * Userspace get information ioctl
  */
@@ -734,6 +771,13 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
        case AMDGPU_INFO_TIMESTAMP:
                ui64 = amdgpu_gfx_get_gpu_clock_counter(adev);
                return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+       case AMDGPU_INFO_CLOCK_COUNTERS: {
+               struct drm_amdgpu_info_clock_counters counters;
+
+                memset(&counters, 0, sizeof(counters));
+               get_cpu_gpu_counters(adev, &counters);
+               return copy_to_user(out, &counters, min(size, 
sizeof(counters))) ? -EFAULT : 0;
+       }
        case AMDGPU_INFO_FW_VERSION: {
                struct drm_amdgpu_info_firmware fw_info;
 
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index bdedbaccf776..e8adbbd076ca 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -1056,6 +1056,8 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
 #define AMDGPU_INFO_HW_IP_COUNT                        0x03
 /* timestamp for GL_ARB_timer_query */
 #define AMDGPU_INFO_TIMESTAMP                  0x05
+/* get synchronized CPU and GPU clock counters  */
+#define AMDGPU_INFO_CLOCK_COUNTERS             0x06
 /* Query the firmware version */
 #define AMDGPU_INFO_FW_VERSION                 0x0e
        /* Subquery id: Query VCE firmware version */
@@ -1598,6 +1600,23 @@ struct drm_amdgpu_info_uq_metadata {
        };
 };
 
+/**
+ * struct drm_amdgpu_info_clock_counters - Clock counter information
+ *
+ * Used to correlate timestamps between CPU and GPU with minimal skew.
+ * All counters are in nanoseconds for consistent comparison.
+ */
+struct drm_amdgpu_info_clock_counters {
+       /* GPU clock counter in nanoseconds */
+       __u64 gpu_clock_counter;
+       /* CPU clock counter (raw monotonic) in nanoseconds */
+       __u64 cpu_clock_counter;
+       /* System boottime clock counter in nanoseconds */
+       __u64 system_clock_counter;
+       /* System clock frequency in Hz (always 1GHz) */
+       __u64 system_clock_freq;
+};
+
 /*
  * Supported GPU families
  */
-- 
2.34.1

Reply via email to