On 08/07, Maíra Canal wrote: > This patch exposes the accumulated amount of active time per client > through the fdinfo infrastructure. The amount of active time is exposed > for each V3D queue: BIN, RENDER, CSD, TFU and CACHE_CLEAN. > > In order to calculate the amount of active time per client, a CPU clock > is used through the function local_clock(). The point where the jobs has > started is marked and is finally compared with the time that the job had > finished. > > Moreover, the number of jobs submitted to each queue is also exposed on > fdinfo through the identifier "v3d-jobs-<queue>". > > Co-developed-by: Jose Maria Casanova Crespo <[email protected]> > Signed-off-by: Jose Maria Casanova Crespo <[email protected]> > Signed-off-by: Maíra Canal <[email protected]> > --- > drivers/gpu/drm/v3d/v3d_drv.c | 30 +++++++++++++++++++++++++++++- > drivers/gpu/drm/v3d/v3d_drv.h | 23 +++++++++++++++++++++++ > drivers/gpu/drm/v3d/v3d_gem.c | 1 + > drivers/gpu/drm/v3d/v3d_irq.c | 17 +++++++++++++++++ > drivers/gpu/drm/v3d/v3d_sched.c | 24 ++++++++++++++++++++++++ > 5 files changed, 94 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c > index ffbbe9d527d3..ca65c707da03 100644 > --- a/drivers/gpu/drm/v3d/v3d_drv.c > +++ b/drivers/gpu/drm/v3d/v3d_drv.c > @@ -19,6 +19,7 @@ > #include <linux/module.h> > #include <linux/of_platform.h> > #include <linux/platform_device.h> > +#include <linux/sched/clock.h> > #include <linux/reset.h> > > #include <drm/drm_drv.h> > @@ -111,6 +112,10 @@ v3d_open(struct drm_device *dev, struct drm_file *file) > v3d_priv->v3d = v3d; > > for (i = 0; i < V3D_MAX_QUEUES; i++) { > + v3d_priv->enabled_ns[i] = 0; > + v3d_priv->start_ns[i] = 0; > + v3d_priv->jobs_sent[i] = 0; > + > sched = &v3d->queue[i].sched; > drm_sched_entity_init(&v3d_priv->sched_entity[i], > DRM_SCHED_PRIORITY_NORMAL, &sched, > @@ -136,7 +141,29 @@ v3d_postclose(struct drm_device *dev, struct drm_file > *file) > kfree(v3d_priv); > } > > -DEFINE_DRM_GEM_FOPS(v3d_drm_fops); > +static void v3d_show_fdinfo(struct drm_printer *p, struct drm_file *file) > +{ > + struct v3d_file_priv *file_priv = file->driver_priv; > + u64 timestamp = local_clock(); > + enum v3d_queue queue; > + > + for (queue = 0; queue < V3D_MAX_QUEUES; queue++) { > + drm_printf(p, "drm-engine-%s: \t%llu ns\n", > + v3d_queue_to_string(queue), > + file_priv->start_ns[queue] ? > file_priv->enabled_ns[queue] > + + timestamp - > file_priv->start_ns[queue] > + : > file_priv->enabled_ns[queue]); > + > + drm_printf(p, "v3d-jobs-%s: \t%llu jobs\n", > + v3d_queue_to_string(queue), > file_priv->jobs_sent[queue]); > + } > +} > + > +static const struct file_operations v3d_drm_fops = { > + .owner = THIS_MODULE, > + DRM_GEM_FOPS, > + .show_fdinfo = drm_show_fdinfo, > +};
Dunno where, but could you document somewhere what is the expected
counting behavior in case of a GPU reset?
>
> /* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP
> * protection between clients. Note that render nodes would be
> @@ -176,6 +203,7 @@ static const struct drm_driver v3d_drm_driver = {
> .ioctls = v3d_drm_ioctls,
> .num_ioctls = ARRAY_SIZE(v3d_drm_ioctls),
> .fops = &v3d_drm_fops,
> + .show_fdinfo = v3d_show_fdinfo,
>
> .name = DRIVER_NAME,
> .desc = DRIVER_DESC,
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
> index 7f664a4b2a75..7f2897e5b2cb 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.h
> +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> @@ -21,6 +21,18 @@ struct reset_control;
>
> #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
>
> +static inline char *v3d_queue_to_string(enum v3d_queue queue)
> +{
> + switch (queue) {
> + case V3D_BIN: return "bin";
> + case V3D_RENDER: return "render";
> + case V3D_TFU: return "tfu";
> + case V3D_CSD: return "csd";
> + case V3D_CACHE_CLEAN: return "cache_clean";
> + }
> + return "UNKNOWN";
> +}
> +
> struct v3d_queue_state {
> struct drm_gpu_scheduler sched;
>
> @@ -167,6 +179,12 @@ struct v3d_file_priv {
> } perfmon;
>
> struct drm_sched_entity sched_entity[V3D_MAX_QUEUES];
> +
> + u64 start_ns[V3D_MAX_QUEUES];
> +
> + u64 enabled_ns[V3D_MAX_QUEUES];
> +
> + u64 jobs_sent[V3D_MAX_QUEUES];
> };
>
> struct v3d_bo {
> @@ -238,6 +256,11 @@ struct v3d_job {
> */
> struct v3d_perfmon *perfmon;
>
> + /* File descriptor of the process that submitted the job that could be
> used
> + * for collecting stats by process of GPU usage.
> + */
> + struct drm_file *file;
> +
> /* Callback for the freeing of the job on refcount going to 0. */
> void (*free)(struct kref *ref);
> };
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 2e94ce788c71..40ed0c7c3fad 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -415,6 +415,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file
> *file_priv,
> job = *container;
> job->v3d = v3d;
> job->free = free;
> + job->file = file_priv;
>
> ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
> v3d_priv);
> diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
> index e714d5318f30..c898800ae9c2 100644
> --- a/drivers/gpu/drm/v3d/v3d_irq.c
> +++ b/drivers/gpu/drm/v3d/v3d_irq.c
> @@ -14,6 +14,7 @@
> */
>
> #include <linux/platform_device.h>
> +#include <linux/sched/clock.h>
>
> #include "v3d_drv.h"
> #include "v3d_regs.h"
> @@ -100,6 +101,10 @@ v3d_irq(int irq, void *arg)
> if (intsts & V3D_INT_FLDONE) {
> struct v3d_fence *fence =
> to_v3d_fence(v3d->bin_job->base.irq_fence);
> + struct v3d_file_priv *file =
> v3d->bin_job->base.file->driver_priv;
> +
> + file->enabled_ns[V3D_BIN] += local_clock() -
> file->start_ns[V3D_BIN];
> + file->start_ns[V3D_BIN] = 0;
>
> trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
> dma_fence_signal(&fence->base);
> @@ -109,6 +114,10 @@ v3d_irq(int irq, void *arg)
> if (intsts & V3D_INT_FRDONE) {
> struct v3d_fence *fence =
> to_v3d_fence(v3d->render_job->base.irq_fence);
> + struct v3d_file_priv *file =
> v3d->render_job->base.file->driver_priv;
> +
> + file->enabled_ns[V3D_RENDER] += local_clock() -
> file->start_ns[V3D_RENDER];
> + file->start_ns[V3D_RENDER] = 0;
>
> trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
> dma_fence_signal(&fence->base);
> @@ -118,6 +127,10 @@ v3d_irq(int irq, void *arg)
> if (intsts & V3D_INT_CSDDONE) {
> struct v3d_fence *fence =
> to_v3d_fence(v3d->csd_job->base.irq_fence);
> + struct v3d_file_priv *file =
> v3d->csd_job->base.file->driver_priv;
> +
> + file->enabled_ns[V3D_CSD] += local_clock() -
> file->start_ns[V3D_CSD];
> + file->start_ns[V3D_CSD] = 0;
>
> trace_v3d_csd_irq(&v3d->drm, fence->seqno);
> dma_fence_signal(&fence->base);
> @@ -154,6 +167,10 @@ v3d_hub_irq(int irq, void *arg)
> if (intsts & V3D_HUB_INT_TFUC) {
> struct v3d_fence *fence =
> to_v3d_fence(v3d->tfu_job->base.irq_fence);
> + struct v3d_file_priv *file =
> v3d->tfu_job->base.file->driver_priv;
> +
> + file->enabled_ns[V3D_TFU] += local_clock() -
> file->start_ns[V3D_TFU];
> + file->start_ns[V3D_TFU] = 0;
>
> trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
> dma_fence_signal(&fence->base);
> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
> index 06238e6d7f5c..b360709c0765 100644
> --- a/drivers/gpu/drm/v3d/v3d_sched.c
> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> @@ -18,6 +18,7 @@
> * semaphores to interlock between them.
> */
>
> +#include <linux/sched/clock.h>
> #include <linux/kthread.h>
>
> #include "v3d_drv.h"
> @@ -76,6 +77,7 @@ static struct dma_fence *v3d_bin_job_run(struct
> drm_sched_job *sched_job)
> {
> struct v3d_bin_job *job = to_bin_job(sched_job);
> struct v3d_dev *v3d = job->base.v3d;
> + struct v3d_file_priv *file = job->base.file->driver_priv;
> struct drm_device *dev = &v3d->drm;
> struct dma_fence *fence;
> unsigned long irqflags;
> @@ -107,6 +109,9 @@ static struct dma_fence *v3d_bin_job_run(struct
> drm_sched_job *sched_job)
> trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
> job->start, job->end);
>
> + file->start_ns[V3D_BIN] = local_clock();
> + file->jobs_sent[V3D_BIN]++;
> +
> v3d_switch_perfmon(v3d, &job->base);
>
> /* Set the current and end address of the control list.
> @@ -131,6 +136,7 @@ static struct dma_fence *v3d_render_job_run(struct
> drm_sched_job *sched_job)
> {
> struct v3d_render_job *job = to_render_job(sched_job);
> struct v3d_dev *v3d = job->base.v3d;
> + struct v3d_file_priv *file = job->base.file->driver_priv;
> struct drm_device *dev = &v3d->drm;
> struct dma_fence *fence;
>
> @@ -158,6 +164,9 @@ static struct dma_fence *v3d_render_job_run(struct
> drm_sched_job *sched_job)
> trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
> job->start, job->end);
>
> + file->start_ns[V3D_RENDER] = local_clock();
> + file->jobs_sent[V3D_RENDER]++;
> +
> v3d_switch_perfmon(v3d, &job->base);
>
> /* XXX: Set the QCFG */
> @@ -176,6 +185,7 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
> {
> struct v3d_tfu_job *job = to_tfu_job(sched_job);
> struct v3d_dev *v3d = job->base.v3d;
> + struct v3d_file_priv *file = job->base.file->driver_priv;
> struct drm_device *dev = &v3d->drm;
> struct dma_fence *fence;
>
> @@ -190,6 +200,9 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
>
> trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
>
> + file->start_ns[V3D_TFU] = local_clock();
> + file->jobs_sent[V3D_TFU]++;
> +
> V3D_WRITE(V3D_TFU_IIA, job->args.iia);
> V3D_WRITE(V3D_TFU_IIS, job->args.iis);
> V3D_WRITE(V3D_TFU_ICA, job->args.ica);
> @@ -213,6 +226,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
> {
> struct v3d_csd_job *job = to_csd_job(sched_job);
> struct v3d_dev *v3d = job->base.v3d;
> + struct v3d_file_priv *file = job->base.file->driver_priv;
> struct drm_device *dev = &v3d->drm;
> struct dma_fence *fence;
> int i;
> @@ -231,6 +245,9 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
>
> trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
>
> + file->start_ns[V3D_CSD] = local_clock();
> + file->jobs_sent[V3D_CSD]++;
> +
> v3d_switch_perfmon(v3d, &job->base);
>
> for (i = 1; i <= 6; i++)
> @@ -246,9 +263,16 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
> {
> struct v3d_job *job = to_v3d_job(sched_job);
> struct v3d_dev *v3d = job->v3d;
> + struct v3d_file_priv *file = job->file->driver_priv;
> +
> + file->start_ns[V3D_CACHE_CLEAN] = local_clock();
> + file->jobs_sent[V3D_CACHE_CLEAN]++;
>
> v3d_clean_caches(v3d);
>
> + file->enabled_ns[V3D_CACHE_CLEAN] += local_clock() -
> file->start_ns[V3D_CACHE_CLEAN];
> + file->start_ns[V3D_CACHE_CLEAN] = 0;
> +
> return NULL;
> }
>
> --
> 2.41.0
>
signature.asc
Description: PGP signature
