Mali's CSF firmware triggers the job IRQ whenever there's new firmware
events for processing. While this can be a global event (BIT(31) of the
status register), it's usually an event relating to a command stream
group (the other bit indices).

Panthor throws these events onto a workqueue for processing outside the
IRQ handler. It's therefore useful to have an instrumented tracepoint
that goes beyond the generic IRQ tracepoint for this specific case, as
it can be augmented with additional data, namely the events bit mask.

This can then be used to debug problems relating to GPU jobs events not
being processed quickly enough. The duration_ns field can be used to
work backwards from when the tracepoint fires (at the end of the IRQ
handler) to figure out when the interrupt itself landed, providing not
just information on how long the work queueing took, but also when the
actual interrupt itself arrived.

With this information in hand, the IRQ handler itself being slow can be
excluded as a possible source of problems, and attention can be directed
to the workqueue processing instead.

Signed-off-by: Nicolas Frattaroli <[email protected]>
---
 drivers/gpu/drm/panthor/panthor_fw.c    | 13 +++++++++++++
 drivers/gpu/drm/panthor/panthor_trace.h | 24 ++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_fw.c 
b/drivers/gpu/drm/panthor/panthor_fw.c
index 94a3cd6dfa6d..df07f6435cda 100644
--- a/drivers/gpu/drm/panthor/panthor_fw.c
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -26,6 +26,7 @@
 #include "panthor_mmu.h"
 #include "panthor_regs.h"
 #include "panthor_sched.h"
+#include "panthor_trace.h"
 
 #define CSF_FW_NAME "mali_csffw.bin"
 
@@ -1059,6 +1060,12 @@ static void panthor_fw_init_global_iface(struct 
panthor_device *ptdev)
 
 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
 {
+       u32 duration;
+       u64 start;
+
+       if (tracepoint_enabled(gpu_job_irq))
+               start = ktime_get_ns();
+
        gpu_write(ptdev, JOB_INT_CLEAR, status);
 
        if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
@@ -1071,6 +1078,12 @@ static void panthor_job_irq_handler(struct 
panthor_device *ptdev, u32 status)
                return;
 
        panthor_sched_report_fw_events(ptdev, status);
+
+       if (tracepoint_enabled(gpu_job_irq)) {
+               if (check_sub_overflow(ktime_get_ns(), start, &duration))
+                       duration = U32_MAX;
+               trace_gpu_job_irq(status, duration);
+       }
 }
 PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
 
diff --git a/drivers/gpu/drm/panthor/panthor_trace.h 
b/drivers/gpu/drm/panthor/panthor_trace.h
index 01013f81e68a..fcddfdb6ffef 100644
--- a/drivers/gpu/drm/panthor/panthor_trace.h
+++ b/drivers/gpu/drm/panthor/panthor_trace.h
@@ -28,6 +28,30 @@ TRACE_EVENT(gpu_power_active,
        )
 );
 
+/**
+ * gpu_job_irq - called after a job interrupt from firmware completes
+ * @events: bitmask of BIT(CSG id) | BIT(31) for a global event
+ * @duration_ns: Nanoseconds between job IRQ handler entry and exit
+ *
+ * The panthor_job_irq_handler() function instrumented by this tracepoint exits
+ * once it has queued the firmware interrupts for processing, not when the
+ * firmware interrupts are fully processed. This tracepoint allows for 
debugging
+ * issues with delays in the workqueue's processing of events.
+ */
+TRACE_EVENT(gpu_job_irq,
+       TP_PROTO(u32 events, u32 duration_ns),
+       TP_ARGS(events, duration_ns),
+       TP_STRUCT__entry(
+               __field(u32, events)
+               __field(u32, duration_ns)
+       ),
+       TP_fast_assign(
+               __entry->events         = events;
+               __entry->duration_ns    = duration_ns;
+       ),
+       TP_printk("events=0x%x duration_ns=%d", __entry->events, 
__entry->duration_ns)
+);
+
 #endif /* __PANTHOR_TRACE_H__ */
 
 #undef TRACE_INCLUDE_PATH

-- 
2.52.0

Reply via email to